From 4ed2054c4ca94086585fe32ea48b8885b06a453f Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 18 Mar 2022 14:49:07 +0000 Subject: [PATCH 01/70] add arepo directory --- src/amuse/community/arepo/Makefile | 42 +++++++++++++++++++++++++ src/amuse/community/arepo/__init__.py | 1 + src/amuse/community/arepo/interface.cc | 11 +++++++ src/amuse/community/arepo/interface.py | 24 ++++++++++++++ src/amuse/community/arepo/src/Makefile | 27 ++++++++++++++++ src/amuse/community/arepo/src/test.cc | 6 ++++ src/amuse/community/arepo/test_arepo.py | 14 +++++++++ 7 files changed, 125 insertions(+) create mode 100644 src/amuse/community/arepo/Makefile create mode 100644 src/amuse/community/arepo/__init__.py create mode 100644 src/amuse/community/arepo/interface.cc create mode 100644 src/amuse/community/arepo/interface.py create mode 100644 src/amuse/community/arepo/src/Makefile create mode 100644 src/amuse/community/arepo/src/test.cc create mode 100644 src/amuse/community/arepo/test_arepo.py diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile new file mode 100644 index 0000000000..7c392db261 --- /dev/null +++ b/src/amuse/community/arepo/Makefile @@ -0,0 +1,42 @@ +# standard amuse configuration include +# config.mk will be made after ./configure has run +ifeq ($(origin AMUSE_DIR), undefined) + AMUSE_DIR := $(shell amusifier --get-amuse-dir) +endif +-include $(AMUSE_DIR)/config.mk + +MPICXX ?= mpicxx + +CFLAGS += -Wall -g +CXXFLAGS += $(CFLAGS) +LDFLAGS += -lm $(MUSE_LD_FLAGS) + +OBJS = interface.o + +CODELIB = src/libarepo.a + +all: arepo_worker + +clean: + $(RM) -rf __pycache__ + $(RM) -f *.so *.o *.pyc worker_code.cc worker_code.h + $(RM) *~ arepo_worker worker_code.cc + make -C src clean + +distclean: clean + make -C src distclean + +$(CODELIB): + make -C src all + +worker_code.cc: interface.py + $(CODE_GENERATOR) --type=c interface.py arepoInterface -o $@ + +worker_code.h: interface.py + $(CODE_GENERATOR) --type=H interface.py arepoInterface -o $@ + +arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) + $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ + +.cc.o: $< + $(CXX) $(CXXFLAGS) -c -o $@ $< diff --git a/src/amuse/community/arepo/__init__.py b/src/amuse/community/arepo/__init__.py new file mode 100644 index 0000000000..abe3ba85b6 --- /dev/null +++ b/src/amuse/community/arepo/__init__.py @@ -0,0 +1 @@ +# generated file \ No newline at end of file diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc new file mode 100644 index 0000000000..a590e82689 --- /dev/null +++ b/src/amuse/community/arepo/interface.cc @@ -0,0 +1,11 @@ +extern int echo(int input); + +/* + * Interface code + */ + +int echo_int(int input, int * output){ + *output = echo(input); + return 0; +} + diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py new file mode 100644 index 0000000000..64130dc384 --- /dev/null +++ b/src/amuse/community/arepo/interface.py @@ -0,0 +1,24 @@ +from amuse.community import * + +class arepoInterface(CodeInterface): + + include_headers = ['worker_code.h'] + + def __init__(self, **keyword_arguments): + CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) + + @legacy_function + def echo_int(): + function = LegacyFunctionSpecification() + function.addParameter('int_in', dtype='int32', direction=function.IN) + function.addParameter('int_out', dtype='int32', direction=function.OUT) + function.result_type = 'int32' + function.can_handle_array = True + return function + + +class arepo(InCodeComponentImplementation): + + def __init__(self, **options): + InCodeComponentImplementation.__init__(self, arepoInterface(**options), **options) + diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile new file mode 100644 index 0000000000..66f18374a1 --- /dev/null +++ b/src/amuse/community/arepo/src/Makefile @@ -0,0 +1,27 @@ +CFLAGS += -Wall -g +CXXFLAGS += $(CFLAGS) +LDFLAGS += -lm $(MUSE_LD_FLAGS) + +CODELIB = libarepo.a + +CODEOBJS = test.o + +AR = ar ruv +RANLIB = ranlib +RM = rm + +all: $(CODELIB) + + +clean: + $(RM) -f *.o *.a + +distclean: clean + +$(CODELIB): $(CODEOBJS) + $(RM) -f $@ + $(AR) $@ $(CODEOBJS) + $(RANLIB) $@ + +.cc.o: $< + $(CXX) $(CXXFLAGS) -c -o $@ $< diff --git a/src/amuse/community/arepo/src/test.cc b/src/amuse/community/arepo/src/test.cc new file mode 100644 index 0000000000..c30eeef8cb --- /dev/null +++ b/src/amuse/community/arepo/src/test.cc @@ -0,0 +1,6 @@ +/* + * Example function for a code + */ +int echo(int input){ + return input; +} diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py new file mode 100644 index 0000000000..8cdeabb474 --- /dev/null +++ b/src/amuse/community/arepo/test_arepo.py @@ -0,0 +1,14 @@ +from amuse.test.amusetest import TestWithMPI + +from .interface import arepoInterface +from .interface import arepo + +class arepoInterfaceTests(TestWithMPI): + + def test1(self): + instance = arepoInterface() + result,error = instance.echo_int(12) + self.assertEquals(error, 0) + self.assertEquals(result, 12) + instance.stop() + From 5507dc490d9f8760e9edb0a9dae7b4141881dd86 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 18 Mar 2022 14:57:05 +0000 Subject: [PATCH 02/70] add arepo source code --- .../arepo/src/add_backgroundgrid/add_bggrid.c | 492 ++ .../arepo/src/add_backgroundgrid/add_bggrid.h | 58 + .../src/add_backgroundgrid/calc_weights.c | 301 + .../arepo/src/add_backgroundgrid/distribute.c | 328 ++ .../community/arepo/src/cooling/cooling.c | 870 +++ .../arepo/src/cooling/cooling_proto.h | 49 + .../arepo/src/cooling/cooling_vars.h | 80 + src/amuse/community/arepo/src/debug_md5/Md5.c | 472 ++ src/amuse/community/arepo/src/debug_md5/Md5.h | 92 + .../arepo/src/debug_md5/calc_checksum.c | 121 + .../community/arepo/src/domain/bsd_tree.h | 865 +++ src/amuse/community/arepo/src/domain/domain.c | 633 ++ src/amuse/community/arepo/src/domain/domain.h | 156 + .../arepo/src/domain/domain_DC_update.c | 699 +++ .../arepo/src/domain/domain_balance.c | 1154 ++++ .../community/arepo/src/domain/domain_box.c | 336 ++ .../arepo/src/domain/domain_counttogo.c | 84 + .../arepo/src/domain/domain_exchange.c | 399 ++ .../arepo/src/domain/domain_rearrange.c | 129 + .../arepo/src/domain/domain_sort_kernels.c | 158 + .../arepo/src/domain/domain_toplevel.c | 393 ++ .../community/arepo/src/domain/domain_vars.c | 117 + src/amuse/community/arepo/src/domain/peano.c | 569 ++ src/amuse/community/arepo/src/fof/fof.c | 967 ++++ src/amuse/community/arepo/src/fof/fof.h | 319 + .../community/arepo/src/fof/fof_distribute.c | 420 ++ .../community/arepo/src/fof/fof_findgroups.c | 720 +++ src/amuse/community/arepo/src/fof/fof_io.c | 3151 ++++++++++ .../community/arepo/src/fof/fof_nearest.c | 473 ++ .../arepo/src/fof/fof_sort_kernels.c | 495 ++ src/amuse/community/arepo/src/fof/fof_vars.c | 79 + .../community/arepo/src/gitversion/version | 7 + .../community/arepo/src/gitversion/version.h | 38 + src/amuse/community/arepo/src/gravity/accel.c | 347 ++ .../community/arepo/src/gravity/forcetree.c | 1827 ++++++ .../community/arepo/src/gravity/forcetree.h | 168 + .../arepo/src/gravity/forcetree_ewald.c | 529 ++ .../src/gravity/forcetree_optimizebalance.c | 486 ++ .../arepo/src/gravity/forcetree_walk.c | 709 +++ .../arepo/src/gravity/grav_external.c | 579 ++ .../arepo/src/gravity/grav_softening.c | 215 + .../community/arepo/src/gravity/gravdirect.c | 259 + .../community/arepo/src/gravity/gravtree.c | 749 +++ .../arepo/src/gravity/gravtree_forcetest.c | 1089 ++++ .../community/arepo/src/gravity/longrange.c | 199 + .../arepo/src/gravity/pm/pm_mpi_fft.c | 1771 ++++++ .../arepo/src/gravity/pm/pm_nonperiodic.c | 2087 +++++++ .../arepo/src/gravity/pm/pm_periodic.c | 2034 +++++++ .../arepo/src/gravity/pm/pm_periodic2d.c | 905 +++ .../arepo/src/hydro/finite_volume_solver.c | 1895 ++++++ .../community/arepo/src/hydro/gradients.c | 149 + src/amuse/community/arepo/src/hydro/mhd.c | 99 + src/amuse/community/arepo/src/hydro/riemann.c | 955 +++ .../community/arepo/src/hydro/riemann_hllc.c | 213 + .../community/arepo/src/hydro/riemann_hlld.c | 567 ++ src/amuse/community/arepo/src/hydro/scalars.c | 107 + .../src/hydro/update_primitive_variables.c | 343 ++ src/amuse/community/arepo/src/init/begrun.c | 344 ++ src/amuse/community/arepo/src/init/density.c | 635 ++ src/amuse/community/arepo/src/init/init.c | 835 +++ src/amuse/community/arepo/src/io/global.c | 257 + src/amuse/community/arepo/src/io/hdf5_util.c | 881 +++ src/amuse/community/arepo/src/io/io.c | 2226 +++++++ src/amuse/community/arepo/src/io/io_fields.c | 765 +++ src/amuse/community/arepo/src/io/logs.c | 623 ++ src/amuse/community/arepo/src/io/parameters.c | 861 +++ src/amuse/community/arepo/src/io/read_ic.c | 1900 ++++++ src/amuse/community/arepo/src/io/restart.c | 1549 +++++ src/amuse/community/arepo/src/main/allvars.c | 331 ++ src/amuse/community/arepo/src/main/allvars.h | 1924 +++++++ src/amuse/community/arepo/src/main/main.c | 296 + .../community/arepo/src/main/main_original.c | 299 + .../community/arepo/src/main/main_reduced.c | 135 + src/amuse/community/arepo/src/main/proto.h | 665 +++ src/amuse/community/arepo/src/main/run.c | 660 +++ .../arepo/src/mesh/criterion_derefinement.c | 181 + .../arepo/src/mesh/criterion_refinement.c | 267 + src/amuse/community/arepo/src/mesh/mesh.h | 268 + .../community/arepo/src/mesh/refinement.c | 217 + .../arepo/src/mesh/set_vertex_velocities.c | 321 ++ .../arepo/src/mesh/voronoi/voronoi.c | 1163 ++++ .../arepo/src/mesh/voronoi/voronoi.h | 379 ++ .../arepo/src/mesh/voronoi/voronoi_1d.c | 363 ++ .../src/mesh/voronoi/voronoi_1d_spherical.c | 339 ++ .../arepo/src/mesh/voronoi/voronoi_2d.c | 2110 +++++++ .../arepo/src/mesh/voronoi/voronoi_3d.c | 5111 +++++++++++++++++ .../arepo/src/mesh/voronoi/voronoi_check.c | 407 ++ .../src/mesh/voronoi/voronoi_derefinement.c | 1088 ++++ .../src/mesh/voronoi/voronoi_dynamic_update.c | 1037 ++++ .../arepo/src/mesh/voronoi/voronoi_exchange.c | 531 ++ .../src/mesh/voronoi/voronoi_ghost_search.c | 1773 ++++++ .../src/mesh/voronoi/voronoi_gradients_lsf.c | 944 +++ .../mesh/voronoi/voronoi_gradients_onedims.c | 204 + .../src/mesh/voronoi/voronoi_refinement.c | 425 ++ .../arepo/src/mesh/voronoi/voronoi_utils.c | 501 ++ .../src/mpi_utils/checksummed_sendrecv.c | 321 ++ .../src/mpi_utils/hypercube_allgatherv.c | 94 + .../community/arepo/src/mpi_utils/mpi_util.c | 375 ++ .../arepo/src/mpi_utils/myIBarrier.c | 175 + .../arepo/src/mpi_utils/myIBarrier.h | 51 + .../arepo/src/mpi_utils/myalltoall.c | 122 + .../community/arepo/src/mpi_utils/pinning.c | 292 + .../src/mpi_utils/sizelimited_sendrecv.c | 116 + .../community/arepo/src/ngbtree/ngbtree.c | 1394 +++++ .../arepo/src/ngbtree/ngbtree_search.c | 376 ++ .../arepo/src/ngbtree/ngbtree_walk.c | 225 + .../arepo/src/star_formation/sfr_eEOS.c | 539 ++ .../arepo/src/star_formation/starformation.c | 437 ++ .../community/arepo/src/subfind/subfind.c | 577 ++ .../community/arepo/src/subfind/subfind.h | 213 + .../arepo/src/subfind/subfind_coll_domain.c | 620 ++ .../arepo/src/subfind/subfind_coll_tree.c | 992 ++++ .../arepo/src/subfind/subfind_coll_treewalk.c | 460 ++ .../arepo/src/subfind/subfind_collective.c | 2417 ++++++++ .../arepo/src/subfind/subfind_density.c | 662 +++ .../arepo/src/subfind/subfind_distribute.c | 421 ++ .../arepo/src/subfind/subfind_findlinkngb.c | 539 ++ .../community/arepo/src/subfind/subfind_io.c | 156 + .../arepo/src/subfind/subfind_loctree.c | 930 +++ .../arepo/src/subfind/subfind_nearesttwo.c | 475 ++ .../arepo/src/subfind/subfind_properties.c | 1195 ++++ .../arepo/src/subfind/subfind_reprocess.c | 240 + .../arepo/src/subfind/subfind_serial.c | 807 +++ .../community/arepo/src/subfind/subfind_so.c | 964 ++++ .../arepo/src/subfind/subfind_so_potegy.c | 853 +++ .../arepo/src/subfind/subfind_sort_kernels.c | 442 ++ .../arepo/src/subfind/subfind_vars.c | 102 + .../arepo/src/time_integration/darkenergy.c | 74 + .../src/time_integration/do_gravity_hydro.c | 484 ++ .../arepo/src/time_integration/driftfac.c | 307 + .../arepo/src/time_integration/predict.c | 506 ++ .../arepo/src/time_integration/timestep.c | 980 ++++ .../arepo/src/time_integration/timestep.h | 88 + .../src/time_integration/timestep_treebased.c | 494 ++ .../community/arepo/src/utils/allocate.c | 133 + src/amuse/community/arepo/src/utils/debug.c | 148 + src/amuse/community/arepo/src/utils/dtypes.h | 195 + .../arepo/src/utils/generic_comm_helpers2.h | 724 +++ .../community/arepo/src/utils/mpz_extension.c | 119 + .../community/arepo/src/utils/mymalloc.c | 792 +++ .../community/arepo/src/utils/parallel_sort.c | 743 +++ .../community/arepo/src/utils/predicates.c | 4292 ++++++++++++++ src/amuse/community/arepo/src/utils/system.c | 1300 +++++ src/amuse/community/arepo/src/utils/tags.h | 50 + src/amuse/community/arepo/src/utils/timer.h | 251 + 145 files changed, 95582 insertions(+) create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/distribute.c create mode 100644 src/amuse/community/arepo/src/cooling/cooling.c create mode 100644 src/amuse/community/arepo/src/cooling/cooling_proto.h create mode 100644 src/amuse/community/arepo/src/cooling/cooling_vars.h create mode 100644 src/amuse/community/arepo/src/debug_md5/Md5.c create mode 100644 src/amuse/community/arepo/src/debug_md5/Md5.h create mode 100644 src/amuse/community/arepo/src/debug_md5/calc_checksum.c create mode 100644 src/amuse/community/arepo/src/domain/bsd_tree.h create mode 100644 src/amuse/community/arepo/src/domain/domain.c create mode 100644 src/amuse/community/arepo/src/domain/domain.h create mode 100644 src/amuse/community/arepo/src/domain/domain_DC_update.c create mode 100644 src/amuse/community/arepo/src/domain/domain_balance.c create mode 100644 src/amuse/community/arepo/src/domain/domain_box.c create mode 100644 src/amuse/community/arepo/src/domain/domain_counttogo.c create mode 100644 src/amuse/community/arepo/src/domain/domain_exchange.c create mode 100644 src/amuse/community/arepo/src/domain/domain_rearrange.c create mode 100644 src/amuse/community/arepo/src/domain/domain_sort_kernels.c create mode 100644 src/amuse/community/arepo/src/domain/domain_toplevel.c create mode 100644 src/amuse/community/arepo/src/domain/domain_vars.c create mode 100644 src/amuse/community/arepo/src/domain/peano.c create mode 100644 src/amuse/community/arepo/src/fof/fof.c create mode 100644 src/amuse/community/arepo/src/fof/fof.h create mode 100644 src/amuse/community/arepo/src/fof/fof_distribute.c create mode 100644 src/amuse/community/arepo/src/fof/fof_findgroups.c create mode 100644 src/amuse/community/arepo/src/fof/fof_io.c create mode 100644 src/amuse/community/arepo/src/fof/fof_nearest.c create mode 100644 src/amuse/community/arepo/src/fof/fof_sort_kernels.c create mode 100644 src/amuse/community/arepo/src/fof/fof_vars.c create mode 100644 src/amuse/community/arepo/src/gitversion/version create mode 100644 src/amuse/community/arepo/src/gitversion/version.h create mode 100644 src/amuse/community/arepo/src/gravity/accel.c create mode 100644 src/amuse/community/arepo/src/gravity/forcetree.c create mode 100644 src/amuse/community/arepo/src/gravity/forcetree.h create mode 100644 src/amuse/community/arepo/src/gravity/forcetree_ewald.c create mode 100644 src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c create mode 100644 src/amuse/community/arepo/src/gravity/forcetree_walk.c create mode 100644 src/amuse/community/arepo/src/gravity/grav_external.c create mode 100644 src/amuse/community/arepo/src/gravity/grav_softening.c create mode 100644 src/amuse/community/arepo/src/gravity/gravdirect.c create mode 100644 src/amuse/community/arepo/src/gravity/gravtree.c create mode 100644 src/amuse/community/arepo/src/gravity/gravtree_forcetest.c create mode 100644 src/amuse/community/arepo/src/gravity/longrange.c create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_periodic.c create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c create mode 100644 src/amuse/community/arepo/src/hydro/finite_volume_solver.c create mode 100644 src/amuse/community/arepo/src/hydro/gradients.c create mode 100644 src/amuse/community/arepo/src/hydro/mhd.c create mode 100644 src/amuse/community/arepo/src/hydro/riemann.c create mode 100644 src/amuse/community/arepo/src/hydro/riemann_hllc.c create mode 100644 src/amuse/community/arepo/src/hydro/riemann_hlld.c create mode 100644 src/amuse/community/arepo/src/hydro/scalars.c create mode 100644 src/amuse/community/arepo/src/hydro/update_primitive_variables.c create mode 100644 src/amuse/community/arepo/src/init/begrun.c create mode 100644 src/amuse/community/arepo/src/init/density.c create mode 100644 src/amuse/community/arepo/src/init/init.c create mode 100644 src/amuse/community/arepo/src/io/global.c create mode 100644 src/amuse/community/arepo/src/io/hdf5_util.c create mode 100644 src/amuse/community/arepo/src/io/io.c create mode 100644 src/amuse/community/arepo/src/io/io_fields.c create mode 100644 src/amuse/community/arepo/src/io/logs.c create mode 100644 src/amuse/community/arepo/src/io/parameters.c create mode 100644 src/amuse/community/arepo/src/io/read_ic.c create mode 100644 src/amuse/community/arepo/src/io/restart.c create mode 100644 src/amuse/community/arepo/src/main/allvars.c create mode 100644 src/amuse/community/arepo/src/main/allvars.h create mode 100644 src/amuse/community/arepo/src/main/main.c create mode 100644 src/amuse/community/arepo/src/main/main_original.c create mode 100644 src/amuse/community/arepo/src/main/main_reduced.c create mode 100644 src/amuse/community/arepo/src/main/proto.h create mode 100644 src/amuse/community/arepo/src/main/run.c create mode 100644 src/amuse/community/arepo/src/mesh/criterion_derefinement.c create mode 100644 src/amuse/community/arepo/src/mesh/criterion_refinement.c create mode 100644 src/amuse/community/arepo/src/mesh/mesh.h create mode 100644 src/amuse/community/arepo/src/mesh/refinement.c create mode 100644 src/amuse/community/arepo/src/mesh/set_vertex_velocities.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi.h create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/mpi_util.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/myIBarrier.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/myIBarrier.h create mode 100644 src/amuse/community/arepo/src/mpi_utils/myalltoall.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/pinning.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c create mode 100644 src/amuse/community/arepo/src/ngbtree/ngbtree.c create mode 100644 src/amuse/community/arepo/src/ngbtree/ngbtree_search.c create mode 100644 src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c create mode 100644 src/amuse/community/arepo/src/star_formation/sfr_eEOS.c create mode 100644 src/amuse/community/arepo/src/star_formation/starformation.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind.h create mode 100644 src/amuse/community/arepo/src/subfind/subfind_coll_domain.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_coll_tree.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_collective.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_density.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_distribute.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_io.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_loctree.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_properties.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_reprocess.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_serial.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_so.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_so_potegy.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_vars.c create mode 100644 src/amuse/community/arepo/src/time_integration/darkenergy.c create mode 100644 src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c create mode 100644 src/amuse/community/arepo/src/time_integration/driftfac.c create mode 100644 src/amuse/community/arepo/src/time_integration/predict.c create mode 100644 src/amuse/community/arepo/src/time_integration/timestep.c create mode 100644 src/amuse/community/arepo/src/time_integration/timestep.h create mode 100644 src/amuse/community/arepo/src/time_integration/timestep_treebased.c create mode 100644 src/amuse/community/arepo/src/utils/allocate.c create mode 100644 src/amuse/community/arepo/src/utils/debug.c create mode 100644 src/amuse/community/arepo/src/utils/dtypes.h create mode 100644 src/amuse/community/arepo/src/utils/generic_comm_helpers2.h create mode 100644 src/amuse/community/arepo/src/utils/mpz_extension.c create mode 100644 src/amuse/community/arepo/src/utils/mymalloc.c create mode 100644 src/amuse/community/arepo/src/utils/parallel_sort.c create mode 100644 src/amuse/community/arepo/src/utils/predicates.c create mode 100644 src/amuse/community/arepo/src/utils/system.c create mode 100644 src/amuse/community/arepo/src/utils/tags.h create mode 100644 src/amuse/community/arepo/src/utils/timer.h diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c new file mode 100644 index 0000000000..ea94880120 --- /dev/null +++ b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c @@ -0,0 +1,492 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/add_backgroundgrid/add_bggrid.c + * \date 05/2018 + * \brief Re-gridding of ICs to ensure that the entire computational + * domain contains gas cells. + * \details Can be used to convert SPH ICs to Arepo ICs. + * contains functions: + * int add_backgroundgrid(void) + * void modify_boxsize(double new_val) + * void prepare_domain_backgroundgrid(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "add_bggrid.h" + +#ifdef ADDBACKGROUNDGRID + +static void modify_boxsize(double new_val); + +MyIDType IDNew; + +/*! \brief Re-gridding of ICs onto oct-tree nodes. + * + * If this is active, no simulation is performed. + * + * \return void + */ +int add_backgroundgrid(void) +{ + int i, no, numnodes; + long long ngas_count_all_old; + double vol, voltot, mgas, mtot; + int flag_all, flag = 0; + + mpi_printf("\n\nADD BACKGROUND GRID: Adding background grid to IC file\n\n"); + + for(i = 0, mgas = 0; i < NumGas; i++) + if(P[i].Type == 0) + mgas += P[i].Mass; + + MPI_Allreduce(&mgas, &mtot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("ADD BACKGROUND GRID: Total gas mass before remap=%g\n", mtot); + + ngas_count_all_old = All.TotNumGas; + + ngb_treefree(); + + domain_free(); + + domain_Decomposition(); /* do new domain decomposition, will also make a new chained-list of synchronized particles */ + + numnodes = construct_forcetree(1, 1, 0, 0); /* build tree only with gas cells */ + + for(i = Tree_MaxPart, vol = 0; i < numnodes + Tree_MaxPart; i++) + { + if(Nodes[i].u.d.sibling == Nodes[i].u.d.nextnode) /* node is a leave */ + { + vol += Nodes[i].len * Nodes[i].len * Nodes[i].len; + } + } + + for(i = 0; i < NumGas; i++) + { + no = Father[i]; + vol += Nodes[no].len * Nodes[no].len * Nodes[no].len / 8; + } + + MPI_Allreduce(&vol, &voltot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("\nADD BACKGROUND GRID: voltot=%g %g\n", voltot, pow(DomainLen, 3)); + + int count_leaves = 0, count_leaves_all; + + for(i = Tree_MaxPart, vol = 0; i < numnodes + Tree_MaxPart; i++) + { + if(Nodes[i].u.d.sibling == Nodes[i].u.d.nextnode) /* node is a leave */ + { + if(Nodes[i].center[0] > 0 && Nodes[i].center[0] < All.BoxSize) + if(Nodes[i].center[1] > 0 && Nodes[i].center[1] < All.BoxSize) + if(Nodes[i].center[2] > 0 && Nodes[i].center[2] < All.BoxSize) + count_leaves++; + } + } + + MPI_Allreduce(&count_leaves, &count_leaves_all, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("ADD BACKGROUND GRID: count_leaves_all=%d\n\n", count_leaves_all); + + if((NumGas + count_leaves >= All.MaxPartSph) || (NumPart + count_leaves >= All.MaxPart)) + flag = 1; + + MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /*Increase storage for newly added gas particles */ + if(flag_all) + domain_resize_storage(count_leaves, count_leaves, 0); + + /* determine maximum ID */ + MyIDType maxid, newid, *tmp; + int *list; + + for(i = 0, maxid = 0; i < NumPart; i++) + if(P[i].ID > maxid) + maxid = P[i].ID; + + tmp = mymalloc("tmp", NTask * sizeof(MyIDType)); + + MPI_Allgather(&maxid, sizeof(MyIDType), MPI_BYTE, tmp, sizeof(MyIDType), MPI_BYTE, MPI_COMM_WORLD); + + for(i = 0; i < NTask; i++) + if(tmp[i] > maxid) + maxid = tmp[i]; + + myfree(tmp); + // maxid is now the total maximum ID number of all particles + + list = mymalloc("list", NTask * sizeof(int)); + + MPI_Allgather(&count_leaves, 1, MPI_INT, list, 1, MPI_INT, MPI_COMM_WORLD); + + newid = maxid + 1; + + for(i = 0; i < ThisTask; i++) + newid += list[i]; + + myfree(list); + + // newid is now the maxid+total of count_leaves over all previous tasks + + IDNew = maxid + 1; /* old gas particles will have IDs below this */ + + // move all particle and sph particle data down the arrays by + // count_leaves. + + memmove(P + count_leaves, P, sizeof(struct particle_data) * NumPart); + memmove(SphP + count_leaves, SphP, sizeof(struct sph_particle_data) * NumGas); + + NumPart += count_leaves; + NumGas += count_leaves; + + // this is the same loop as determined count_leaves above, so + // it will be applied count_leaves times again. + count_leaves = 0; + for(i = Tree_MaxPart, vol = 0; i < numnodes + Tree_MaxPart; i++) + { + if(Nodes[i].u.d.sibling == Nodes[i].u.d.nextnode) /* node is a leave */ + { + if(Nodes[i].center[0] > 0 && Nodes[i].center[0] < All.BoxSize) + if(Nodes[i].center[1] > 0 && Nodes[i].center[1] < All.BoxSize) + if(Nodes[i].center[2] > 0 && Nodes[i].center[2] < All.BoxSize) + { + P[count_leaves].Pos[0] = Nodes[i].center[0]; + P[count_leaves].Pos[1] = Nodes[i].center[1]; + P[count_leaves].Pos[2] = Nodes[i].center[2]; + P[count_leaves].Vel[0] = 0; + P[count_leaves].Vel[1] = 0; + P[count_leaves].Vel[2] = 0; + + P[count_leaves].Mass = 0; + P[count_leaves].TimeBinHydro = 0; + P[count_leaves].TimeBinGrav = 0; + + P[count_leaves].Ti_Current = All.Ti_Current; + +#ifdef MHD + SphP[count_leaves].B[0] = 0; + SphP[count_leaves].B[1] = 0; + SphP[count_leaves].B[2] = 0; + SphP[count_leaves].DivB = 0; +#endif /* #ifdef MHD */ + + P[count_leaves].Type = 0; + P[count_leaves].SofteningType = All.SofteningTypeOfPartType[0]; + + // this puts the new ID at the right spot + P[count_leaves].ID = newid++; + + SphP[count_leaves].Volume = Nodes[i].len * Nodes[i].len * Nodes[i].len; + SphP[count_leaves].Utherm = 0; + SphP[count_leaves].Energy = 0; + SphP[count_leaves].Momentum[0] = 0; + SphP[count_leaves].Momentum[1] = 0; + SphP[count_leaves].Momentum[2] = 0; + + count_leaves++; + } + } + } + + /* Delete the force tree */ + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + + calculate_weights(); + distribute_particles(); + + int count_elim = 0, count_elim_all; + + for(i = 0; i < NumGas; i++) + if(P[i].Type == 0) + { + if(P[i].ID <= maxid) + { + // remove particle i by swapping in the last sph particle + // and then swap the last particle to that spot + P[i] = P[NumGas - 1]; + P[NumGas - 1] = P[NumPart - 1]; + + SphP[i] = SphP[NumGas - 1]; + + NumPart--; + NumGas--; + i--; + + count_elim++; + } + else + { + if(P[i].Mass > 0) + { + SphP[i].Utherm = SphP[i].Energy / P[i].Mass; + P[i].Vel[0] = SphP[i].Momentum[0] / P[i].Mass; + P[i].Vel[1] = SphP[i].Momentum[1] / P[i].Mass; + P[i].Vel[2] = SphP[i].Momentum[2] / P[i].Mass; + } + } + } + + MPI_Allreduce(&count_elim, &count_elim_all, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + sumup_large_ints(1, &NumPart, &All.TotNumPart); + sumup_large_ints(1, &NumGas, &All.TotNumGas); + + mpi_printf("\nADD BACKGROUND GRID: count_elim_all=%d IDNew=%d\n", count_elim_all, IDNew); + mpi_printf("ADD BACKGROUND GRID: added particles=%d (task 0: NumGas=%d)\n", count_leaves_all - count_elim_all, NumGas); + mpi_printf("ADD BACKGROUND GRID: new particle number=%d\n", All.TotNumPart); + mpi_printf("ADD BACKGROUND GRID: new gas particle number=%d\n\n", All.TotNumGas); + + for(i = 0, mgas = 0; i < NumGas; i++) + if(P[i].Type == 0) + mgas += P[i].Mass; + + MPI_Allreduce(&mgas, &mtot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("ADD BACKGROUND GRID: Total gas mass after remap=%g\n", mtot); + + savepositions(0, 0); + + mpi_printf("\nADD BACKGROUND GRID: GridSize = %d\n", All.GridSize); + mpi_printf( + "ADD BACKGROUND GRID: Suggested value for MeanVolume = %g\nADD BACKGROUND GRID: Suggested value for ReferenceGasPartMass = %g\n", + pow(All.BoxSize / All.GridSize, 3), mtot / ngas_count_all_old); + mpi_printf("ADD BACKGROUND GRID: Suggested value for BoxSize = %g\n", All.BoxSize); + mpi_printf("ADD BACKGROUND GRID: Done!\n\n"); + + return 0; +} + +/*! \brief Changes the box size to a new value. + * + * LONG_X, LONG_Y and LONG_Z are still active as specified in Config file. + * + * \param[in] new_val New box size. + * + * \return void + */ +void modify_boxsize(double new_val) +{ + All.BoxSize = new_val; + + boxSize = All.BoxSize; + boxHalf = 0.5 * All.BoxSize; +#ifdef LONG_X + boxHalf_X = boxHalf * LONG_X; + boxSize_X = boxSize * LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + boxHalf_Y = boxHalf * LONG_Y; + boxSize_Y = boxSize * LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + boxHalf_Z = boxHalf * LONG_Z; + boxSize_Z = boxSize * LONG_Z; +#endif /* #ifdef LONG_Z */ +} + +/*! \brief Prepares computational box; makes sure simulation volume is large + * enough. + * + * \return void + */ +void prepare_domain_backgroundgrid(void) +{ + int i, j, shift_half_box = 0, min_topleave_num = 0, set_grid_size_flag = 0; + unsigned int size, bit_num; + double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3]; + double len_gas, xmin_gas[3], xmax_gas[3], xmin_gas_glob[3], xmax_gas_glob[3]; + double min_box_size, max_box_size; + + mpi_printf("\n\nADD BACKGROUND GRID: preparing domain for first domain decomposition\n"); + + /* Checking GridSize limits */ + if(All.GridSize < 0) + terminate("GridSize = %d is less than zero. This is not allowed.", All.GridSize); + + if(All.GridSize > ADDBACKGROUNDGRIDMAX) + terminate("GridSize = %d is exceeding the max grid size = %d", All.GridSize, ADDBACKGROUNDGRIDMAX); + + if(All.GridSize > 0) + set_grid_size_flag = 1; + + /* Now checking it is a power of two. If not assign the closest value (is this required?) */ + bit_num = 0; + size = ADDBACKGROUNDGRIDMAX; + while(((size & 1) == 0) && size > 1) + { + size >>= 1; + bit_num++; + } + + for(j = 1; j < bit_num; j++) + { + size = All.GridSize; + size >>= (bit_num - j); + if((size & 1) == 1) + break; + } + + mpi_printf("ADD BACKGROUND GRID: original value of GridSize = %d\n", All.GridSize); + + All.GridSize = (size << (bit_num - j - 1)); + + if(All.GridSize < 1) + All.GridSize = 1; + + mpi_printf("ADD BACKGROUND GRID: closest power of two corresponding to GridSize = %d is taken as initial guess\n", 2 * All.GridSize); + + /* determine local extension */ + for(j = 0; j < 3; j++) + { + xmin[j] = MAX_REAL_NUMBER; + xmax[j] = -MAX_REAL_NUMBER; + xmin_gas[j] = MAX_REAL_NUMBER; + xmax_gas[j] = -MAX_REAL_NUMBER; + } + + for(i = 0; i < NumPart; i++) + { + for(j = 0; j < 3; j++) + { + if(xmin[j] > P[i].Pos[j]) + xmin[j] = P[i].Pos[j]; + + if(xmax[j] < P[i].Pos[j]) + xmax[j] = P[i].Pos[j]; + } + } + + for(i = 0; i < NumGas; i++) + { + for(j = 0; j < 3; j++) + { + if(xmin_gas[j] > P[i].Pos[j]) + xmin_gas[j] = P[i].Pos[j]; + + if(xmax_gas[j] < P[i].Pos[j]) + xmax_gas[j] = P[i].Pos[j]; + } + } + + MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(xmin_gas, xmin_gas_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(xmax_gas, xmax_gas_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + + mpi_printf("ADD BACKGROUND GRID: Min and max coordinates.\n"); + mpi_printf("ADD BACKGROUND GRID: xmin|ymin|zmin=% g|% g|% g.\n", xmin_glob[0], xmin_glob[1], xmin_glob[2]); + mpi_printf("ADD BACKGROUND GRID: xmax|ymax|zmax=% g|% g|% g.\n", xmax_glob[0], xmax_glob[1], xmax_glob[2]); + mpi_printf("ADD BACKGROUND GRID: xmin_gas|ymin_gas|zmin_gas=% g|% g|% g.\n", xmin_gas_glob[0], xmin_gas_glob[1], xmin_gas_glob[2]); + mpi_printf("ADD BACKGROUND GRID: xmax_gas|ymax_gas|zmax_gas=% g|% g|% g.\n", xmax_gas_glob[0], xmax_gas_glob[1], xmax_gas_glob[2]); + + len = 0; + len_gas = 0; + for(j = 0; j < 3; j++) + { + if(xmax_glob[j] - xmin_glob[j] > len) + len = xmax_glob[j] - xmin_glob[j]; + + if(xmax_gas_glob[j] - xmin_gas_glob[j] > len_gas) + len_gas = xmax_gas_glob[j] - xmin_gas_glob[j]; + + if(xmin_glob[j] < 0) + shift_half_box = 1; + } + + max_box_size = FACTOR_MAX_BOX_SIZE * len_gas; + min_box_size = FACTOR_MIN_BOX_SIZE * len_gas; + + if(All.BoxSize < min_box_size) + { + mpi_printf("ADD BACKGROUND GRID: Need to increase the BoxSize. Old value = %g, new value = %g\n", All.BoxSize, min_box_size); + modify_boxsize(min_box_size); + } + if(All.BoxSize > max_box_size) + { + mpi_printf("ADD BACKGROUND GRID: Need to decrease the BoxSize. Old value = %g, new value = %g\n", All.BoxSize, max_box_size); + modify_boxsize(max_box_size); + } + + mpi_printf("ADD BACKGROUND GRID: Domain extent %g, BoxSize = %g, ratio = %g\n", len, All.BoxSize, len / All.BoxSize); + mpi_printf("ADD BACKGROUND GRID: Gas extent %g, BoxSize = %g, ratio = %g\n", len_gas, All.BoxSize, len_gas / All.BoxSize); + + /* the terminate condition must be checked properly */ + if(!set_grid_size_flag) + { + while(min_topleave_num < NTask && (All.BoxSize / len_gas) > All.GridSize && All.GridSize < ADDBACKGROUNDGRIDMAX) + { + All.GridSize <<= 1; + min_topleave_num = (int)pow(len_gas * All.GridSize / All.BoxSize, 3.0); + mpi_printf("ADD BACKGROUND GRID: GridSize=%3d, min_topleave_num=%6d, NTask=%6d, BoxSize/GridSize=%g, len_gas/GridSize=%g\n", + All.GridSize, min_topleave_num, NTask, All.BoxSize / All.GridSize, len_gas / All.BoxSize); + } + } + else + { + All.GridSize <<= 1; + min_topleave_num = (int)pow(len_gas * All.GridSize / All.BoxSize, 3.0); + mpi_printf("ADD BACKGROUND GRID: GridSize=%3d, min_topleave_num=%6d, NTask=%6d, BoxSize/GridSize=%g, len_gas/GridSize=%g\n", + All.GridSize, min_topleave_num, NTask, All.BoxSize / All.GridSize, len_gas / All.BoxSize); + } + + if(min_topleave_num < NTask) + { + char buf[500]; + sprintf(buf, + "min_topleave_num=%d < NTask=%d, MaxGridSize=%d. Try either to run with less task or to set the BoxSize to a smaller " + "value\n", + min_topleave_num, NTask, ADDBACKGROUNDGRIDMAX); + terminate(buf); + } + + if(len_gas / All.BoxSize > All.GridSize) + { + char buf[500]; + sprintf(buf, "len_gas/BoxSize=%g > GridSize=%d, MaxGridSize=%d. GridSize should be increased if possible\n", + len_gas / All.BoxSize, All.GridSize, ADDBACKGROUNDGRIDMAX); + terminate(buf); + } + + if(shift_half_box) + { + mpi_printf("ADD BACKGROUND GRID: Need to shift particles by half box size\n\n"); + for(i = 0; i < NumPart; i++) + { + P[i].Pos[0] += 0.5 * All.BoxSize; + P[i].Pos[1] += 0.5 * All.BoxSize; + P[i].Pos[2] += 0.5 * All.BoxSize; + } + } +} + +#endif /* #ifdef ADDBACKGROUNDGRID */ diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h new file mode 100644 index 0000000000..47c81c199b --- /dev/null +++ b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h @@ -0,0 +1,58 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/add_backgroundgrid/add_bggrid.h + * \date 05/2018 + * \brief Re-gridding of ICs to ensure that the entire computational + * domain contains gas cells. + * \details Can be used to convert SPH ICs to Arepo ICs. + * Interface functions: + * int add_backgroundgrid(void); + * void prepare_domain_backgroundgrid(void); + * Functions of this module called in: + * init() (init.c) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef ADD_BGGRID_H +#define ADD_BGGRID_H + +#include "../main/allvars.h" + +#ifdef ADDBACKGROUNDGRID + +#define ADDBACKGROUNDGRIDMAX 256 +#define FACTOR_MAX_BOX_SIZE 15.0 +#define FACTOR_MIN_BOX_SIZE 2.0 + +extern MyIDType IDNew; + +int add_backgroundgrid(void); +void prepare_domain_backgroundgrid(void); +void calculate_weights(); +void distribute_particles(); + +#endif /* #ifdef ADDBACKGROUNDGRID */ + +#endif /* ADD_BGGRID_H */ diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c b/src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c new file mode 100644 index 0000000000..8e0f2ea04e --- /dev/null +++ b/src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c @@ -0,0 +1,301 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/add_backgroundgrid/calc_weights.c + * \date 05/2018 + * \brief Routine that calculates the cumulative weights of neighboring + * cells. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void calculate_weights() + * int find_cells_evaluate(int target, int mode, int thread_id) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "add_bggrid.h" + +#ifdef ADDBACKGROUNDGRID + +static int find_cells_evaluate(int target, int mode, int thread_id); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + + in->Hsml = SphP[i].Hsml; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Weight; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + SphP[i].Weight = out->Weight; + } + else /* combine */ + { + SphP[i].Weight += out->Weight; + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int idx; + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= TimeBinsGravity.NActiveParticles) + break; + + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + find_cells_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + find_cells_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Calculates SPH weights of each cell. + * + * \return void + */ +void calculate_weights() +{ + domain_free(); + domain_Decomposition(); /* do new domain decomposition, will also make a new chained-list of synchronized particles */ + + ngb_treeallocate(); + ngb_treebuild(NumGas); + + mpi_printf("ADD BACKGROUND GRID: distribution of fluid quantities in a SPH-like fashion\n"); + mpi_printf("ADD BACKGROUND GRID: finding the normalization factors\n"); + + TimeBinsGravity.NActiveParticles = 0; + + int i; + for(i = 0; i < NumGas; i++) + { + if(P[i].Mass > 0) + { + TimeBinsGravity.ActiveParticleList[TimeBinsGravity.NActiveParticles] = i; + TimeBinsGravity.NActiveParticles++; + } + } + + generic_set_MaxNexport(); + + generic_comm_pattern(TimeBinsGravity.NActiveParticles, kernel_local, kernel_imported); + + mpi_printf("ADD BACKGROUND GRID: done\n"); +} + +/*! \brief finds cells and adds up weights in an SPH fashion + * + * \param[in] target Index of particle/cell + * \param[in] mode Flag if it operates on local or imported data + * \param[in] threadid ID of thread + * + * \return 0 + */ +int find_cells_evaluate(int target, int mode, int thread_id) +{ + int j, n, numnodes, *firstnode; + double h, h2, hinv, hinv3; + MyDouble dx, dy, dz, r; + MyDouble *pos; + double xtmp, ytmp, ztmp; + + double weight = 0; + + data_in local, *target_data; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + h = target_data->Hsml; + h2 = h * h; + hinv = 1.0 / h; +#ifndef TWODIMS + hinv3 = hinv * hinv * hinv; +#else /* #ifndef TWODIMS */ + hinv3 = hinv * hinv / boxSize_Z; +#endif /* #ifndef TWODIMS #else */ + + int nfound = ngb_treefind_variable_threads(pos, h, target, mode, thread_id, numnodes, firstnode); + + for(n = 0; n < nfound; n++) + { + j = Thread[thread_id].Ngblist[n]; + + if(P[j].ID >= IDNew) + { + dx = NGB_PERIODIC_LONG_X(pos[0] - P[j].Pos[0]); + dy = NGB_PERIODIC_LONG_Y(pos[1] - P[j].Pos[1]); + dz = NGB_PERIODIC_LONG_Z(pos[2] - P[j].Pos[2]); + + double r2 = dx * dx + dy * dy + dz * dz; + + if(r2 < h2) + { + r = sqrt(r2); + + double u = r * hinv; + double wk; + if(u < 0.5) + wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u); + else + wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u); + + weight += wk * SphP[j].Volume; + } + } + } + + out.Weight = weight; + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef ADDBACKGROUNDGRID */ diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/distribute.c b/src/amuse/community/arepo/src/add_backgroundgrid/distribute.c new file mode 100644 index 0000000000..aad7d150c5 --- /dev/null +++ b/src/amuse/community/arepo/src/add_backgroundgrid/distribute.c @@ -0,0 +1,328 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/add_backgroundgrid/distribute.c + * \date 05/2018 + * \brief Distributes the cell properties in an SPH kernel weighted + * fashion to neighboring cells. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void distribute_particles(void) + * int find_cells_evaluate(int target, int mode, int thread_id) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "add_bggrid.h" + +#ifdef ADDBACKGROUNDGRID + +static int find_cells_evaluate(int target, int mode, int thread_id); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; + MyFloat Weight; + MyFloat Mass; + MyFloat InternalEnergy; + MyFloat Momentum[3]; +#ifdef MHD + MyFloat B[3]; +#endif /* #ifdef MHD */ + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + + in->Hsml = SphP[i].Hsml; + + in->Weight = SphP[i].Weight; + in->Mass = P[i].Mass; + in->InternalEnergy = SphP[i].Utherm * P[i].Mass; + + int k; + for(k = 0; k < 3; k++) + in->Momentum[k] = P[i].Vel[k] * P[i].Mass; + +#ifdef MHD + for(k = 0; k < 3; k++) + in->B[k] = SphP[i].B[k]; +#endif /* #ifdef MHD */ + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + char nothing; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) { return; } + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int idx; + { + int j, threadid = get_thread_num(); + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= TimeBinsGravity.NActiveParticles) + break; + + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + find_cells_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + find_cells_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Main function to distribute hydro quantities over a kernel average. + * + * \return void + */ +void distribute_particles(void) +{ + mpi_printf("ADD BACKGROUND GRID: distributing the fluid quantities\n"); + + generic_set_MaxNexport(); + + generic_comm_pattern(TimeBinsGravity.NActiveParticles, kernel_local, kernel_imported); + +#ifdef MHD + /* now divide the B field in each cell by the weight (sum of the wk's, + which we stored in SphP.divB */ + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].ID >= IDNew) + { + int j; + if(SphP[i].DivB > 0) + for(j = 0; j < 3; j++) + SphP[i].B[j] /= SphP[i].DivB; + } + } +#endif /* #ifdef MHD */ + + mpi_printf("ADD BACKGROUND GRID: done\n"); +} + +/*! \brief Distributes imported properties on neighbouring cells. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +int find_cells_evaluate(int target, int mode, int thread_id) +{ + int j, n, numnodes, *firstnode; + double h, h2, hinv, hinv3; + MyDouble dx, dy, dz, r; + MyDouble *pos; + double xtmp, ytmp, ztmp; + + data_in local, *target_data; + data_out out; + out.nothing = 0; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + h = target_data->Hsml; + h2 = h * h; + hinv = 1.0 / h; +#ifndef TWODIMS + hinv3 = hinv * hinv * hinv; +#else /* #ifndef TWODIMS */ + hinv3 = hinv * hinv / boxSize_Z; +#endif /* #ifndef TWODIMS #else */ + + int nfound = ngb_treefind_variable_threads(pos, h, target, mode, thread_id, numnodes, firstnode); + + double wsum = 0; + + for(n = 0; n < nfound; n++) + { + j = Thread[thread_id].Ngblist[n]; + + if(P[j].ID < IDNew) + continue; + + dx = NGB_PERIODIC_LONG_X(pos[0] - P[j].Pos[0]); + dy = NGB_PERIODIC_LONG_Y(pos[1] - P[j].Pos[1]); + dz = NGB_PERIODIC_LONG_Z(pos[2] - P[j].Pos[2]); + + double r2 = dx * dx + dy * dy + dz * dz; + + if(r2 < h2) + { + r = sqrt(r2); + + double u = r * hinv; + double wk; + if(u < 0.5) + wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u); + else + wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u); + + double weight = SphP[j].Volume * wk / target_data->Weight; + + wsum += weight; + + P[j].Mass += target_data->Mass * weight; + SphP[j].Energy += target_data->InternalEnergy * weight; + + int k; + for(k = 0; k < 3; k++) + SphP[j].Momentum[k] += target_data->Momentum[k] * weight; + +#ifdef MHD + for(k = 0; k < 3; k++) + SphP[j].B[k] += target_data->B[k] * weight; + SphP[j].DivB += wk; +#endif /* #ifdef MHD */ + } + } + + if(wsum > 1.01) + { + printf("wsum=%g, Weight=%g, target=%d\n", wsum, target_data->Weight, target); + terminate("bla"); + } + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef ADDBACKGROUNDGRID */ diff --git a/src/amuse/community/arepo/src/cooling/cooling.c b/src/amuse/community/arepo/src/cooling/cooling.c new file mode 100644 index 0000000000..7e7cebbc98 --- /dev/null +++ b/src/amuse/community/arepo/src/cooling/cooling.c @@ -0,0 +1,870 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/cooling/cooling.c + * \date 05/2018 + * \brief Module for gas radiative cooling + * \details contains functions: + * double DoCooling(double u_old, double rho, double dt, double + * *ne_guess) + * double GetCoolingTime(double u_old, double rho, double + * *ne_guess) + * double convert_u_to_temp(double u, double rho, double + * *ne_guess) + * void find_abundances_and_rates(double logT, double rho, + * double *ne_guess) + * double CoolingRateFromU(double u, double rho, double + * *ne_guess) + * void SetOutputGasState(int i, double *ne_guess, double *nH0, + * double *coolrate) + * double CoolingRate(double logT, double rho, double *nelec) + * void MakeRateTable(void) + * void ReadIonizeParams(char *fname, int which) + * void IonizeParamsUVB(void) + * void SetZeroIonization(void) + * void IonizeParams(void) + * void InitCool(void) + * void cooling_only(void) + * void cool_cell(int i) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef COOLING + +static double Tmin = 0.0; /*!< min temperature in log10 */ +static double Tmax = 9.0; /*!< max temperature in log10 */ +static double deltaT; /*!< log10 of temperature spacing in the interpolation tables */ +static GasState gs; /*!< gas state */ +static RateTable *RateT; /*!< tabulated rates */ +static PhotoTable *PhotoTUVB; /*!< photo-ionization/heating rate table for UV background */ +static PhotoCurrent pc; /*!< current interpolated photo rates */ +static int NheattabUVB; /*!< length of UVB photo table */ +static DoCoolData DoCool; /*!< cooling data */ + +/*! \brief Computes the new internal energy per unit mass. + * + * The function solves for the new internal energy per unit mass of the gas + * by integrating the equation for the internal energy with an implicit + * Euler scheme. The root of resulting non linear equation, + * which gives tnew internal energy, is found with the bisection method. + * Arguments are passed in code units. + * + * \param[in] u_old the initial (before cooling is applied) internal energy + * per unit mass of the gas cell. + * \param[in] rho the proper density of the gas cell. + * \param[in] dt the duration of the time step. + * \param[in] ne_guess electron number density relative to hydrogen number + * density (for molecular weight computation). + * + * \return The new internal energy per unit mass of the gas cell. + */ +double DoCooling(double u_old, double rho, double dt, double *ne_guess) +{ + double u, du; + double u_lower, u_upper; + double ratefact; + double LambdaNet; + + int iter = 0; + + DoCool.u_old_input = u_old; + DoCool.rho_input = rho; + DoCool.dt_input = dt; + DoCool.ne_guess_input = *ne_guess; + + if(!gsl_finite(u_old)) + terminate("invalid input: u_old=%g\n", u_old); + + if(u_old < 0 || rho < 0) + terminate("invalid input: task=%d u_old=%g rho=%g dt=%g All.MinEgySpec=%g\n", ThisTask, u_old, rho, dt, All.MinEgySpec); + + rho *= All.UnitDensity_in_cgs * All.HubbleParam * All.HubbleParam; /* convert to physical cgs units */ + u_old *= All.UnitPressure_in_cgs / All.UnitDensity_in_cgs; + dt *= All.UnitTime_in_s / All.HubbleParam; + + gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */ + ratefact = gs.nHcgs * gs.nHcgs / rho; + + u = u_old; + u_lower = u; + u_upper = u; + + LambdaNet = CoolingRateFromU(u, rho, ne_guess); + + /* bracketing */ + if(u - u_old - ratefact * LambdaNet * dt < 0) /* heating */ + { + u_upper *= sqrt(1.1); + u_lower /= sqrt(1.1); + while(u_upper - u_old - ratefact * CoolingRateFromU(u_upper, rho, ne_guess) * dt < 0) + { + u_upper *= 1.1; + u_lower *= 1.1; + } + } + + if(u - u_old - ratefact * LambdaNet * dt > 0) + { + u_lower /= sqrt(1.1); + u_upper *= sqrt(1.1); + while(u_lower - u_old - ratefact * CoolingRateFromU(u_lower, rho, ne_guess) * dt > 0) + { + u_upper /= 1.1; + u_lower /= 1.1; + } + } + + do + { + u = 0.5 * (u_lower + u_upper); + + LambdaNet = CoolingRateFromU(u, rho, ne_guess); + + if(u - u_old - ratefact * LambdaNet * dt > 0) + { + u_upper = u; + } + else + { + u_lower = u; + } + + du = u_upper - u_lower; + + iter++; + + if(iter >= (MAXITER - 10)) + printf("u= %g\n", u); + } + while(fabs(du / u) > 1.0e-6 && iter < MAXITER); + + if(iter >= MAXITER) + terminate( + "failed to converge in DoCooling(): DoCool.u_old_input=%g\nDoCool.rho_input= %g\nDoCool.dt_input= %g\nDoCool.ne_guess_input= " + "%g\n", + DoCool.u_old_input, DoCool.rho_input, DoCool.dt_input, DoCool.ne_guess_input); + + u *= All.UnitDensity_in_cgs / All.UnitPressure_in_cgs; /* to internal units */ + + return u; +} + +/*! \brief Returns the cooling time. + * + * If we actually have heating, a cooling time of 0 is returned. + * + * \param[in] u_old The initial (before cooling is applied) internal energy + * per unit mass of the gas cell. + * \param[in] rho The proper density of the gas cell. + * \param[in] ne_guess Electron number density relative to hydrogen number + * density (for molecular weight computation). + * + * \return Cooling time; 0 if heating. + */ +double GetCoolingTime(double u_old, double rho, double *ne_guess) +{ + double u; + double ratefact; + double LambdaNet, coolingtime; + + DoCool.u_old_input = u_old; + DoCool.rho_input = rho; + DoCool.ne_guess_input = *ne_guess; + + rho *= All.UnitDensity_in_cgs * All.HubbleParam * All.HubbleParam; /* convert to physical cgs units */ + u_old *= All.UnitPressure_in_cgs / All.UnitDensity_in_cgs; + + gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */ + ratefact = gs.nHcgs * gs.nHcgs / rho; + + u = u_old; + + LambdaNet = CoolingRateFromU(u, rho, ne_guess); + + if(LambdaNet >= 0) /* ups, we have actually heating due to UV background */ + return 0; + + coolingtime = u_old / (-ratefact * LambdaNet); + + coolingtime *= All.HubbleParam / All.UnitTime_in_s; + + return coolingtime; +} + +/*! \brief Compute gas temperature from internal energy per unit mass. + * + * This function determines the electron fraction, and hence the mean + * molecular weight. With it arrives at a self-consistent temperature. + * Element abundances and the rates for the emission are also computed. + * + * \param[in] u internal energy per unit mass. + * \param[in] rho gas density. + * \param[in, out] ne_guess electron number density relative to hydrogen + * number density + * + * \return The gas temperature. + */ +double convert_u_to_temp(double u, double rho, double *ne_guess) +{ + double temp, temp_old, temp_new, max = 0, ne_old; + double mu; + int iter = 0; + + double u_input, rho_input, ne_input; + + u_input = u; + rho_input = rho; + ne_input = *ne_guess; + + mu = (1 + 4 * gs.yhelium) / (1 + gs.yhelium + *ne_guess); + temp = GAMMA_MINUS1 / BOLTZMANN * u * PROTONMASS * mu; + + do + { + ne_old = *ne_guess; + + find_abundances_and_rates(log10(temp), rho, ne_guess); + temp_old = temp; + + mu = (1 + 4 * gs.yhelium) / (1 + gs.yhelium + *ne_guess); + + temp_new = GAMMA_MINUS1 / BOLTZMANN * u * PROTONMASS * mu; + + max = dmax(max, temp_new / (1 + gs.yhelium + *ne_guess) * fabs((*ne_guess - ne_old) / (temp_new - temp_old + 1.0))); + + temp = temp_old + (temp_new - temp_old) / (1 + max); + iter++; + + if(iter > (MAXITER - 10)) + printf("-> temp= %g ne=%g\n", temp, *ne_guess); + } + while(fabs(temp - temp_old) > 1.0e-3 * temp && iter < MAXITER); + + if(iter >= MAXITER) + { + printf("failed to converge in convert_u_to_temp()\n"); + printf("u_input= %g\nrho_input=%g\n ne_input=%g\n", u_input, rho_input, ne_input); + printf("DoCool.u_old_input=%g\nDoCool.rho_input= %g\nDoCool.dt_input= %g\nDoCool.ne_guess_input= %g\n", DoCool.u_old_input, + DoCool.rho_input, DoCool.dt_input, DoCool.ne_guess_input); + terminate("convergence failure"); + } + + gs.mu = mu; + + return temp; +} + +/*! \brief Computes the actual abundance ratios. + * + * The chemical composition of the gas is primordial (no metals are present). + * + * \param[in] logT log10 of gas temperature. + * \param[in] rho Gas density. + * \param[in, out] ne_guess Electron number density relative to hydrogen + * number density. + * + * \return void + */ +void find_abundances_and_rates(double logT, double rho, double *ne_guess) +{ + double neold, nenew; + int j, niter; + double flow, fhi, t; + + double logT_input, rho_input, ne_input; + + logT_input = logT; + rho_input = rho; + ne_input = *ne_guess; + + if(!gsl_finite(logT)) + terminate("logT=%g\n", logT); + + if(logT <= Tmin) /* everything neutral */ + { + gs.nH0 = 1.0; + gs.nHe0 = gs.yhelium; + gs.nHp = 0; + gs.nHep = 0; + gs.nHepp = 0; + gs.ne = 0; + *ne_guess = 0; + return; + } + + if(logT >= Tmax) /* everything is ionized */ + { + gs.nH0 = 0; + gs.nHe0 = 0; + gs.nHp = 1.0; + gs.nHep = 0; + gs.nHepp = gs.yhelium; + gs.ne = gs.nHp + 2.0 * gs.nHepp; + *ne_guess = gs.ne; /* note: in units of the hydrogen number density */ + return; + } + + t = (logT - Tmin) / deltaT; + j = (int)t; + fhi = t - j; + flow = 1 - fhi; + + if(*ne_guess == 0) + *ne_guess = 1.0; + + gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */ + + gs.ne = *ne_guess; + neold = gs.ne; + niter = 0; + gs.necgs = gs.ne * gs.nHcgs; + + /* evaluate number densities iteratively (cf KWH eqns 33-38) in units of nH */ + do + { + niter++; + + gs.aHp = flow * RateT[j].AlphaHp + fhi * RateT[j + 1].AlphaHp; + gs.aHep = flow * RateT[j].AlphaHep + fhi * RateT[j + 1].AlphaHep; + gs.aHepp = flow * RateT[j].AlphaHepp + fhi * RateT[j + 1].AlphaHepp; + gs.ad = flow * RateT[j].Alphad + fhi * RateT[j + 1].Alphad; + gs.geH0 = flow * RateT[j].GammaeH0 + fhi * RateT[j + 1].GammaeH0; + gs.geHe0 = flow * RateT[j].GammaeHe0 + fhi * RateT[j + 1].GammaeHe0; + gs.geHep = flow * RateT[j].GammaeHep + fhi * RateT[j + 1].GammaeHep; + + if(gs.necgs <= 1.e-25 || pc.J_UV == 0) + { + gs.gJH0ne = gs.gJHe0ne = gs.gJHepne = 0; + } + else + { + gs.gJH0ne = pc.gJH0 / gs.necgs; + gs.gJHe0ne = pc.gJHe0 / gs.necgs; + gs.gJHepne = pc.gJHep / gs.necgs; + } + + gs.nH0 = gs.aHp / (gs.aHp + gs.geH0 + gs.gJH0ne); /* eqn (33) */ + gs.nHp = 1.0 - gs.nH0; /* eqn (34) */ + + if((gs.gJHe0ne + gs.geHe0) <= SMALLNUM) /* no ionization at all */ + { + gs.nHep = 0.0; + gs.nHepp = 0.0; + gs.nHe0 = gs.yhelium; + } + else + { + gs.nHep = + gs.yhelium / (1.0 + (gs.aHep + gs.ad) / (gs.geHe0 + gs.gJHe0ne) + (gs.geHep + gs.gJHepne) / gs.aHepp); /* eqn (35) */ + gs.nHe0 = gs.nHep * (gs.aHep + gs.ad) / (gs.geHe0 + gs.gJHe0ne); /* eqn (36) */ + gs.nHepp = gs.nHep * (gs.geHep + gs.gJHepne) / gs.aHepp; /* eqn (37) */ + } + + neold = gs.ne; + + gs.ne = gs.nHp + gs.nHep + 2 * gs.nHepp; /* eqn (38) */ + gs.necgs = gs.ne * gs.nHcgs; + + if(pc.J_UV == 0) + break; + + nenew = 0.5 * (gs.ne + neold); + gs.ne = nenew; + gs.necgs = gs.ne * gs.nHcgs; + + if(fabs(gs.ne - neold) < 1.0e-4) + break; + + if(niter > (MAXITER - 10)) + printf("ne= %g niter=%d\n", gs.ne, niter); + } + while(niter < MAXITER); + + if(niter >= MAXITER) + { + printf("gs.aHp = %le\n", gs.aHp); + char buff[1000]; + sprintf(buff, "%s/cooling_task%d.dat", All.OutputDir, ThisTask); + FILE *fp = fopen(buff, "w"); + fwrite(&All.Time, sizeof(double), 1, fp); + fwrite(&logT_input, sizeof(double), 1, fp); + fwrite(&rho_input, sizeof(double), 1, fp); + fwrite(&ne_input, sizeof(double), 1, fp); + fclose(fp); + terminate( + "no convergence reached in find_abundances_and_rates(): logT_input= %g rho_input= %g ne_input= %g " + "DoCool.u_old_input=%g\nDoCool.rho_input= %g\nDoCool.dt_input= %g\nDoCool.ne_guess_input= %g\n", + logT_input, rho_input, ne_input, DoCool.u_old_input, DoCool.rho_input, DoCool.dt_input, DoCool.ne_guess_input); + } + gs.bH0 = flow * RateT[j].BetaH0 + fhi * RateT[j + 1].BetaH0; + gs.bHep = flow * RateT[j].BetaHep + fhi * RateT[j + 1].BetaHep; + gs.bff = flow * RateT[j].Betaff + fhi * RateT[j + 1].Betaff; + + *ne_guess = gs.ne; +} + +/*! \brief Get cooling rate from gas internal energy. + * + * This function first computes the self-consistent temperature + * and abundance ratios, and then it calculates + * (heating rate-cooling rate)/n_h^2 in cgs units. + * + * \param[in] u Gas internal energy per unit mass. + * \param[in] rho Gas density. + * \param[in, out] ne_guess Electron number density relative to hydrogen + * number density. + * + * \return Cooling rate. + */ +double CoolingRateFromU(double u, double rho, double *ne_guess) +{ + double temp; + + temp = convert_u_to_temp(u, rho, ne_guess); + + return CoolingRate(log10(temp), rho, ne_guess); +} + +/*! \brief This function computes the self-consistent temperature and + * abundance ratios. + * + * Used only in io_fields.c for calculating output fields. + * + * \param[in] i index into SphP for gas cell to consider. + * \param[in, out] ne_guess pointer to electron number density relative to + * hydrogen number density (modified). + * \param[out] nH0 Pointer to the neutral hydrogen fraction (set to value in + * the GasState struct). + * \param[out] coolrate Pointer to cooling rate (set to value from + * CoolingRateFromU). + * + * \return void + */ +void SetOutputGasState(int i, double *ne_guess, double *nH0, double *coolrate) +{ + double sfr = 0; + double rho = SphP[i].Density * All.cf_a3inv; + double u = dmax(All.MinEgySpec, SphP[i].Utherm); + + /* update GasState as appropriate given compile-time options and cell properties */ +#if defined(USE_SFR) + sfr = get_starformation_rate(i); +#endif /* #if defined(USE_SFR) */ + + /* update DoCool */ + DoCool.u_old_input = u; + DoCool.rho_input = rho; + DoCool.ne_guess_input = *ne_guess; + + /* convert to physical cgs units */ + rho *= All.UnitDensity_in_cgs * All.HubbleParam * All.HubbleParam; + u *= All.UnitPressure_in_cgs / All.UnitDensity_in_cgs; + + /* calculate cooling rate (and so ne_guess and all of gs including nH0, nHeII) */ + *coolrate = CoolingRateFromU(u, rho, ne_guess); + + *nH0 = gs.nH0; +} + +/*! \brief Calculate (heating rate-cooling rate)/n_h^2 in cgs units. + * + * \param[in] logT log10 of gas temperature. + * \param[in] rho Gas density. + * \param[in, out] nelec Electron number density relative to hydrogen number + * density. + * + * \return (heating rate-cooling rate)/n_h^2. + */ +double CoolingRate(double logT, double rho, double *nelec) +{ + double Lambda, Heat; + double LambdaExc, LambdaIon, LambdaRec, LambdaFF, LambdaCmptn = 0.0; + double LambdaExcH0, LambdaExcHep, LambdaIonH0, LambdaIonHe0, LambdaIonHep; + double LambdaRecHp, LambdaRecHep, LambdaRecHepp, LambdaRecHepd; + double redshift; + double T; + double LambdaPrim = 0.0, LambdaMet = 0.0, LambdaDust = 0.0, LambdaMol = 0.0; + + if(logT <= Tmin) + logT = Tmin + 0.5 * deltaT; /* floor at Tmin */ + + gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */ + + if(logT < Tmax) + { + find_abundances_and_rates(logT, rho, nelec); + + /* Compute cooling and heating rate (cf KWH Table 1) in units of nH**2 */ + T = pow(10.0, logT); + + LambdaExcH0 = gs.bH0 * gs.ne * gs.nH0; + LambdaExcHep = gs.bHep * gs.ne * gs.nHep; + LambdaExc = LambdaExcH0 + LambdaExcHep; /* excitation */ + LambdaIonH0 = 2.18e-11 * gs.geH0 * gs.ne * gs.nH0; + LambdaIonHe0 = 3.94e-11 * gs.geHe0 * gs.ne * gs.nHe0; + LambdaIonHep = 8.72e-11 * gs.geHep * gs.ne * gs.nHep; + LambdaIon = LambdaIonH0 + LambdaIonHe0 + LambdaIonHep; /* ionization */ + LambdaRecHp = 1.036e-16 * T * gs.ne * (gs.aHp * gs.nHp); + LambdaRecHep = 1.036e-16 * T * gs.ne * (gs.aHep * gs.nHep); + LambdaRecHepp = 1.036e-16 * T * gs.ne * (gs.aHepp * gs.nHepp); + LambdaRecHepd = 6.526e-11 * gs.ad * gs.ne * gs.nHep; + LambdaRec = LambdaRecHp + LambdaRecHep + LambdaRecHepp + LambdaRecHepd; + LambdaFF = gs.bff * (gs.nHp + gs.nHep + 4 * gs.nHepp) * gs.ne; + LambdaPrim = LambdaExc + LambdaIon + LambdaRec + LambdaFF; + + if(All.ComovingIntegrationOn) + { + redshift = 1 / All.Time - 1; + LambdaCmptn = 5.65e-36 * gs.ne * (T - 2.73 * (1. + redshift)) * pow(1. + redshift, 4.) / gs.nHcgs; + } + else + LambdaCmptn = 0; + + Lambda = LambdaPrim + LambdaMet + LambdaDust + LambdaCmptn + LambdaMol; + + Heat = 0; + if(pc.J_UV != 0) + Heat += (gs.nH0 * pc.epsH0 + gs.nHe0 * pc.epsHe0 + gs.nHep * pc.epsHep) / gs.nHcgs; + } + else /* here we're outside of tabulated rates, T>Tmax K */ + { + /* at high T (fully ionized); only free-free and Compton cooling are present. Assumes no heating. */ + Heat = 0; + + LambdaExcH0 = LambdaExcHep = LambdaIonH0 = LambdaIonHe0 = LambdaIonHep = LambdaRecHp = LambdaRecHep = LambdaRecHepp = + LambdaRecHepd = 0; + + /* very hot: H and He both fully ionized */ + gs.nHp = 1.0; + gs.nHep = 0; + gs.nHepp = gs.yhelium; + gs.ne = gs.nHp + 2.0 * gs.nHepp; + *nelec = gs.ne; /* note: in units of the hydrogen number density */ + + T = pow(10.0, logT); + LambdaFF = 1.42e-27 * sqrt(T) * (1.1 + 0.34 * exp(-(5.5 - logT) * (5.5 - logT) / 3)) * (gs.nHp + 4 * gs.nHepp) * gs.ne; + + if(All.ComovingIntegrationOn) + { + redshift = 1 / All.Time - 1; + /* add inverse Compton cooling off the microwave background */ + LambdaCmptn = 5.65e-36 * gs.ne * (T - 2.73 * (1. + redshift)) * pow(1. + redshift, 4.) / gs.nHcgs; + } + else + LambdaCmptn = 0; + + Lambda = LambdaFF + LambdaCmptn; + } + + return (Heat - Lambda); +} + +/*! \brief Make cooling rates interpolation table. + * + * Set up interpolation tables in T for cooling rates given in + * KWH, ApJS, 105, 19. + * + * \return void + */ +void MakeRateTable(void) +{ + int i; + double T; + double Tfact; + + gs.yhelium = (1 - gs.XH) / (4 * gs.XH); + gs.mhboltz = PROTONMASS / BOLTZMANN; + if(All.MinGasTemp > 0.0) + Tmin = log10(0.1 * All.MinGasTemp); + else + Tmin = 1.0; + deltaT = (Tmax - Tmin) / NCOOLTAB; + gs.ethmin = pow(10.0, Tmin) * (1. + gs.yhelium) / ((1. + 4. * gs.yhelium) * gs.mhboltz * GAMMA_MINUS1); + /* minimum internal energy for neutral gas */ + + for(i = 0; i <= NCOOLTAB; i++) + { + RateT[i].BetaH0 = RateT[i].BetaHep = RateT[i].Betaff = RateT[i].AlphaHp = RateT[i].AlphaHep = RateT[i].AlphaHepp = + RateT[i].Alphad = RateT[i].GammaeH0 = RateT[i].GammaeHe0 = RateT[i].GammaeHep = 0; + + T = pow(10.0, Tmin + deltaT * i); + Tfact = 1.0 / (1 + sqrt(T / 1.0e5)); + + /* collisional excitation */ + /* Cen 1992 */ + if(118348 / T < 70) + RateT[i].BetaH0 = 7.5e-19 * exp(-118348 / T) * Tfact; + if(473638 / T < 70) + RateT[i].BetaHep = 5.54e-17 * pow(T, -0.397) * exp(-473638 / T) * Tfact; + + /* free-free */ + RateT[i].Betaff = 1.43e-27 * sqrt(T) * (1.1 + 0.34 * exp(-(5.5 - log10(T)) * (5.5 - log10(T)) / 3)); + + /* recombination */ + /* Cen 1992 */ + /* Hydrogen II */ + RateT[i].AlphaHp = 8.4e-11 * pow(T / 1000, -0.2) / (1. + pow(T / 1.0e6, 0.7)) / sqrt(T); + /* Helium II */ + RateT[i].AlphaHep = 1.5e-10 * pow(T, -0.6353); + /* Helium III */ + RateT[i].AlphaHepp = 4. * RateT[i].AlphaHp; + + /* Cen 1992 */ + /* dielectric recombination */ + if(470000 / T < 70) + RateT[i].Alphad = 1.9e-3 * pow(T, -1.5) * exp(-470000 / T) * (1. + 0.3 * exp(-94000 / T)); + + /* collisional ionization */ + /* Cen 1992 */ + /* Hydrogen */ + if(157809.1 / T < 70) + RateT[i].GammaeH0 = 5.85e-11 * sqrt(T) * exp(-157809.1 / T) * Tfact; + /* Helium */ + if(285335.4 / T < 70) + RateT[i].GammaeHe0 = 2.38e-11 * sqrt(T) * exp(-285335.4 / T) * Tfact; + /* Hellium II */ + if(631515.0 / T < 70) + RateT[i].GammaeHep = 5.68e-12 * sqrt(T) * exp(-631515.0 / T) * Tfact; + } +} + +/*! \brief Read table input for ionizing parameters. + * + * \param[in] fname Name of file that contains the tabulated parameters. + * \param[in] which Flag used to identify the type of the ionizing background + * (0 = UV background, 1 = AGN background, 2=RADCOOL). + * + * \return void + */ +void ReadIonizeParams(char *fname, int which) +{ + int iter, i; + FILE *fdcool; + float dummy; + + if(which == 0) + { + NheattabUVB = 0; + + for(iter = 0, i = 0; iter < 2; iter++) + { + if(!(fdcool = fopen(fname, "r"))) + terminate("COOLING: cannot read ionization table in file `%s'\n", fname); + if(iter == 0) + while(fscanf(fdcool, "%g %g %g %g %g %g %g", &dummy, &dummy, &dummy, &dummy, &dummy, &dummy, &dummy) != EOF) + NheattabUVB++; + if(iter == 1) + while(fscanf(fdcool, "%g %g %g %g %g %g %g", &PhotoTUVB[i].variable, &PhotoTUVB[i].gH0, &PhotoTUVB[i].gHe, + &PhotoTUVB[i].gHep, &PhotoTUVB[i].eH0, &PhotoTUVB[i].eHe, &PhotoTUVB[i].eHep) != EOF) + i++; + fclose(fdcool); + + if(iter == 0) + { + PhotoTUVB = (PhotoTable *)mymalloc("PhotoT", NheattabUVB * sizeof(PhotoTable)); + mpi_printf("COOLING: read ionization table with %d entries in file `%s'.\n", NheattabUVB, fname); + } + } + /* ignore zeros at end of treecool file */ + for(i = 0; i < NheattabUVB; ++i) + if(PhotoTUVB[i].gH0 == 0.0) + break; + + NheattabUVB = i; + mpi_printf("COOLING: using %d ionization table entries from file `%s'.\n", NheattabUVB, fname); + } +} + +/*! \brief Set the ionization parameters for the UV background. + * + * \return void + */ +void IonizeParamsUVB(void) +{ + int i, ilow; + double logz, dzlow, dzhi; + double redshift; + + if(All.ComovingIntegrationOn) + redshift = 1 / All.Time - 1; + else + { + redshift = 0.0; + } + + logz = log10(redshift + 1.0); + ilow = 0; + for(i = 0; i < NheattabUVB; i++) + { + if(PhotoTUVB[i].variable < logz) + ilow = i; + else + break; + } + + dzlow = logz - PhotoTUVB[ilow].variable; + dzhi = PhotoTUVB[ilow + 1].variable - logz; + + if(NheattabUVB == 0 || logz > PhotoTUVB[NheattabUVB - 1].variable || PhotoTUVB[ilow].gH0 == 0 || PhotoTUVB[ilow + 1].gH0 == 0) + { + SetZeroIonization(); + return; + } + else + pc.J_UV = 1; + + pc.gJH0 = pow(10., (dzhi * log10(PhotoTUVB[ilow].gH0) + dzlow * log10(PhotoTUVB[ilow + 1].gH0)) / (dzlow + dzhi)); + pc.gJHe0 = pow(10., (dzhi * log10(PhotoTUVB[ilow].gHe) + dzlow * log10(PhotoTUVB[ilow + 1].gHe)) / (dzlow + dzhi)); + pc.gJHep = pow(10., (dzhi * log10(PhotoTUVB[ilow].gHep) + dzlow * log10(PhotoTUVB[ilow + 1].gHep)) / (dzlow + dzhi)); + pc.epsH0 = pow(10., (dzhi * log10(PhotoTUVB[ilow].eH0) + dzlow * log10(PhotoTUVB[ilow + 1].eH0)) / (dzlow + dzhi)); + pc.epsHe0 = pow(10., (dzhi * log10(PhotoTUVB[ilow].eHe) + dzlow * log10(PhotoTUVB[ilow + 1].eHe)) / (dzlow + dzhi)); + pc.epsHep = pow(10., (dzhi * log10(PhotoTUVB[ilow].eHep) + dzlow * log10(PhotoTUVB[ilow + 1].eHep)) / (dzlow + dzhi)); + + return; +} + +/*! \brief Reset the ionization parameters. + * + * \return void + */ +void SetZeroIonization(void) { memset(&pc, 0, sizeof(PhotoCurrent)); } + +/*! \brief Wrapper function to set the ionizing background. + * + * \return void + */ +void IonizeParams(void) { IonizeParamsUVB(); } + +/*! \brief Initialize the cooling module. + * + * This function initializes the cooling module. In particular, + * it allocates the memory for the cooling rate and ionization tables + * and initializes them. + * + * \return void + */ +void InitCool(void) +{ + /* set default hydrogen mass fraction */ + gs.XH = HYDROGEN_MASSFRAC; + + /* zero photo-ionization/heating rates */ + SetZeroIonization(); + + /* allocate and construct rate table */ + RateT = (RateTable *)mymalloc("RateT", (NCOOLTAB + 1) * sizeof(RateTable)); + ; + MakeRateTable(); + + /* read photo tables */ + ReadIonizeParams(All.TreecoolFile, 0); + + mpi_printf("GFM_COOLING: time, time begin = %le\t%le\n", All.Time, All.TimeBegin); + All.Time = All.TimeBegin; + set_cosmo_factors_for_current_time(); + + IonizeParams(); +} + +/*! \brief Apply the isochoric cooling to all the active gas cells. + * + * \return void + */ +void cooling_only(void) /* normal cooling routine when star formation is disabled */ +{ + int idx, i; + + CPU_Step[CPU_MISC] += measure_time(); + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i >= 0) + { + if(P[i].Mass == 0 && P[i].ID == 0) + continue; /* skip cells that have been swallowed or eliminated */ + + cool_cell(i); + } + } + CPU_Step[CPU_COOLINGSFR] += measure_time(); +} + +/*! \brief Apply the isochoric cooling to a given gas cell. + * + * This function applies the normal isochoric cooling to a single gas cell. + * Once the cooling has been applied according to one of the cooling models + * implemented, the internal energy per unit mass, the total energy and the + * pressure of the cell are updated. + * + * \param[in] i Index of the gas cell to which cooling is applied. + * + * \return void + */ +void cool_cell(int i) +{ + double dt, dtime, ne = 1; + double unew, dens, dtcool; + + dens = SphP[i].Density; + + dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + + dtime = All.cf_atime * dt / All.cf_time_hubble_a; + + dtcool = dtime; + + ne = SphP[i].Ne; /* electron abundance (gives ionization state and mean molecular weight) */ + unew = DoCooling(dmax(All.MinEgySpec, SphP[i].Utherm), dens * All.cf_a3inv, dtcool, &ne); + SphP[i].Ne = ne; + + if(unew < 0) + terminate("invalid temperature: Thistask=%d i=%d unew=%g\n", ThisTask, i, unew); + + double du = unew - SphP[i].Utherm; + + if(unew < All.MinEgySpec) + du = All.MinEgySpec - SphP[i].Utherm; + + SphP[i].Utherm += du; + SphP[i].Energy += All.cf_atime * All.cf_atime * du * P[i].Mass; + +#ifdef OUTPUT_COOLHEAT + if(dtime > 0) + SphP[i].CoolHeat = du * P[i].Mass / dtime; +#endif /* #ifdef OUTPUT_COOLHEAT */ + + set_pressure_of_cell(i); +} + +#endif /* #ifdef COOLING */ diff --git a/src/amuse/community/arepo/src/cooling/cooling_proto.h b/src/amuse/community/arepo/src/cooling/cooling_proto.h new file mode 100644 index 0000000000..cbd304a838 --- /dev/null +++ b/src/amuse/community/arepo/src/cooling/cooling_proto.h @@ -0,0 +1,49 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/cooling/cooling_proto.h + * \date 05/2018 + * \brief Header for cooling functions. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef INLINE_FUNC +#define INLINE_FUNC +#endif /* #ifndef INLINE_FUNC */ + +void SetOutputGasState(int i, double *ne_guess, double *nH0, double *coolrate); + +double convert_u_to_temp(double u, double rho, double *ne_guess); +double CoolingRate(double logT, double rho, double *nelec); +double CoolingRateFromU(double u, double rho, double *ne_guess); +double DoCooling(double u_old, double rho, double dt, double *ne_guess); +double GetCoolingTime(double u_old, double rho, double *ne_guess); + +void find_abundances_and_rates(double logT, double rho, double *ne_guess); +void InitCool(void); +void IonizeParamsUVB(void); +void IonizeParams(void); +void ReadIonizeParams(char *fname, int which); +void SetZeroIonization(void); diff --git a/src/amuse/community/arepo/src/cooling/cooling_vars.h b/src/amuse/community/arepo/src/cooling/cooling_vars.h new file mode 100644 index 0000000000..22a737522d --- /dev/null +++ b/src/amuse/community/arepo/src/cooling/cooling_vars.h @@ -0,0 +1,80 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/cooling/cooling_vars.h + * \date 05/2018 + * \brief Variables for cooling. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#define NCOOLTAB 2000 +#define SMALLNUM 1.0e-60 +#define COOLLIM 0.1 +#define HEATLIM 20.0 +#define eV_to_K 11606.0 +#define eV_to_erg 1.60184e-12 +#define MAX_TABLESIZE 250 /* Max # of lines in TREECOOL */ + +/* data for gas state */ +typedef struct +{ + double ne, necgs, nHcgs; + double bH0, bHep, bff, aHp, aHep, aHepp, ad, geH0, geHe0, geHep; + double gJH0ne, gJHe0ne, gJHepne; + double nH0, nHp, nHep, nHe0, nHepp; + double XH, yhelium; + double mhboltz; + double ethmin; /* minimum internal energy for neutral gas */ + double mu; +} GasState; + +/* tabulated rates */ +typedef struct +{ + double BetaH0, BetaHep, Betaff; + double AlphaHp, AlphaHep, Alphad, AlphaHepp; + double GammaeH0, GammaeHe0, GammaeHep; +} RateTable; + +/* photo-ionization/heating rate table */ +typedef struct +{ + float variable; /* logz for UVB */ + float gH0, gHe, gHep; /* photo-ionization rates */ + float eH0, eHe, eHep; /* photo-heating rates */ +} PhotoTable; + +/* current interpolated photo-ionization/heating rates */ +typedef struct +{ + char J_UV; + double gJH0, gJHep, gJHe0, epsH0, epsHep, epsHe0; +} PhotoCurrent; + +/* cooling data */ +typedef struct +{ + double u_old_input, rho_input, dt_input, ne_guess_input; +} DoCoolData; diff --git a/src/amuse/community/arepo/src/debug_md5/Md5.c b/src/amuse/community/arepo/src/debug_md5/Md5.c new file mode 100644 index 0000000000..5ac2d223fe --- /dev/null +++ b/src/amuse/community/arepo/src/debug_md5/Md5.c @@ -0,0 +1,472 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/debug_md5/Md5.c + * \date MM/YYYY + * \brief + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +/* + ********************************************************************** + ** md5.c ** + ** RSA Data Security, Inc. MD5 Message Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 1/91 SRD,AJ,BSK,JT Reference C Version ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +/* -- include the following line if the md5.h header file is separate -- */ +#include "Md5.h" + +/* forward declaration */ +static void Transform(); +static void MD5Update(MD5_CTX *mdContext, unsigned char *inBuf, unsigned int inLen); + +static unsigned char PADDING[64] = {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +/* F, G and H are basic MD5 functions: selection, majority, parity */ +#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) +#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define I(x, y, z) ((y) ^ ((x) | (~z))) + +/* ROTATE_LEFT rotates x left n bits */ +#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + +/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */ +/* Rotation is separate from addition to prevent recomputation */ +#define FF(a, b, c, d, x, s, ac) \ + { \ + (a) += F((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT((a), (s)); \ + (a) += (b); \ + } +#define GG(a, b, c, d, x, s, ac) \ + { \ + (a) += G((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT((a), (s)); \ + (a) += (b); \ + } +#define HH(a, b, c, d, x, s, ac) \ + { \ + (a) += H((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT((a), (s)); \ + (a) += (b); \ + } +#define II(a, b, c, d, x, s, ac) \ + { \ + (a) += I((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT((a), (s)); \ + (a) += (b); \ + } + +void MD5Init(MD5_CTX *mdContext) +{ + mdContext->i[0] = mdContext->i[1] = (UINT4)0; + + /* Load magic initialization constants. + */ + mdContext->buf[0] = (UINT4)0x67452301; + mdContext->buf[1] = (UINT4)0xefcdab89; + mdContext->buf[2] = (UINT4)0x98badcfe; + mdContext->buf[3] = (UINT4)0x10325476; +} + +void MD5UpdateLong(MD5_CTX *mdContext, unsigned char *inBuf, unsigned long long inLenLong) +{ + while(inLenLong > 0) + { + unsigned int inLen = 0x10000000; + if(inLen > inLenLong) + inLen = inLenLong; + MD5Update(mdContext, inBuf, inLen); + inBuf += inLen; + inLenLong -= inLen; + } +} + +void MD5Update(MD5_CTX *mdContext, unsigned char *inBuf, unsigned int inLen) +{ + UINT4 in[16]; + int mdi; + unsigned int i, ii; + + /* compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* update number of bits */ + if((mdContext->i[0] + ((UINT4)inLen << 3)) < mdContext->i[0]) + mdContext->i[1]++; + mdContext->i[0] += ((UINT4)inLen << 3); + mdContext->i[1] += ((UINT4)inLen >> 29); + + while(inLen--) + { + /* add new character to buffer, increment mdi */ + mdContext->in[mdi++] = *inBuf++; + + /* transform if necessary */ + if(mdi == 0x40) + { + for(i = 0, ii = 0; i < 16; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii + 3]) << 24) | (((UINT4)mdContext->in[ii + 2]) << 16) | + (((UINT4)mdContext->in[ii + 1]) << 8) | ((UINT4)mdContext->in[ii]); + Transform(mdContext->buf, in); + mdi = 0; + } + } +} + +void MD5Final(MD5_CTX *mdContext) +{ + UINT4 in[16]; + int mdi; + unsigned int i, ii; + unsigned int padLen; + + /* save number of bits */ + in[14] = mdContext->i[0]; + in[15] = mdContext->i[1]; + + /* compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* pad out to 56 mod 64 */ + padLen = (mdi < 56) ? (56 - mdi) : (120 - mdi); + MD5Update(mdContext, PADDING, padLen); + + /* append length in bits and transform */ + for(i = 0, ii = 0; i < 14; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii + 3]) << 24) | (((UINT4)mdContext->in[ii + 2]) << 16) | (((UINT4)mdContext->in[ii + 1]) << 8) | + ((UINT4)mdContext->in[ii]); + Transform(mdContext->buf, in); + + /* store buffer in digest */ + for(i = 0, ii = 0; i < 4; i++, ii += 4) + { + mdContext->digest[ii] = (unsigned char)(mdContext->buf[i] & 0xFF); + mdContext->digest[ii + 1] = (unsigned char)((mdContext->buf[i] >> 8) & 0xFF); + mdContext->digest[ii + 2] = (unsigned char)((mdContext->buf[i] >> 16) & 0xFF); + mdContext->digest[ii + 3] = (unsigned char)((mdContext->buf[i] >> 24) & 0xFF); + } +} + +/* Basic MD5 step. Transform buf based on in. + */ +static void Transform(buf, in) UINT4 *buf; +UINT4 *in; +{ + UINT4 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; + + /* Round 1 */ +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 + FF(a, b, c, d, in[0], S11, 3614090360); /* 1 */ + FF(d, a, b, c, in[1], S12, 3905402710); /* 2 */ + FF(c, d, a, b, in[2], S13, 606105819); /* 3 */ + FF(b, c, d, a, in[3], S14, 3250441966); /* 4 */ + FF(a, b, c, d, in[4], S11, 4118548399); /* 5 */ + FF(d, a, b, c, in[5], S12, 1200080426); /* 6 */ + FF(c, d, a, b, in[6], S13, 2821735955); /* 7 */ + FF(b, c, d, a, in[7], S14, 4249261313); /* 8 */ + FF(a, b, c, d, in[8], S11, 1770035416); /* 9 */ + FF(d, a, b, c, in[9], S12, 2336552879); /* 10 */ + FF(c, d, a, b, in[10], S13, 4294925233); /* 11 */ + FF(b, c, d, a, in[11], S14, 2304563134); /* 12 */ + FF(a, b, c, d, in[12], S11, 1804603682); /* 13 */ + FF(d, a, b, c, in[13], S12, 4254626195); /* 14 */ + FF(c, d, a, b, in[14], S13, 2792965006); /* 15 */ + FF(b, c, d, a, in[15], S14, 1236535329); /* 16 */ + + /* Round 2 */ +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 + GG(a, b, c, d, in[1], S21, 4129170786); /* 17 */ + GG(d, a, b, c, in[6], S22, 3225465664); /* 18 */ + GG(c, d, a, b, in[11], S23, 643717713); /* 19 */ + GG(b, c, d, a, in[0], S24, 3921069994); /* 20 */ + GG(a, b, c, d, in[5], S21, 3593408605); /* 21 */ + GG(d, a, b, c, in[10], S22, 38016083); /* 22 */ + GG(c, d, a, b, in[15], S23, 3634488961); /* 23 */ + GG(b, c, d, a, in[4], S24, 3889429448); /* 24 */ + GG(a, b, c, d, in[9], S21, 568446438); /* 25 */ + GG(d, a, b, c, in[14], S22, 3275163606); /* 26 */ + GG(c, d, a, b, in[3], S23, 4107603335); /* 27 */ + GG(b, c, d, a, in[8], S24, 1163531501); /* 28 */ + GG(a, b, c, d, in[13], S21, 2850285829); /* 29 */ + GG(d, a, b, c, in[2], S22, 4243563512); /* 30 */ + GG(c, d, a, b, in[7], S23, 1735328473); /* 31 */ + GG(b, c, d, a, in[12], S24, 2368359562); /* 32 */ + + /* Round 3 */ +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 + HH(a, b, c, d, in[5], S31, 4294588738); /* 33 */ + HH(d, a, b, c, in[8], S32, 2272392833); /* 34 */ + HH(c, d, a, b, in[11], S33, 1839030562); /* 35 */ + HH(b, c, d, a, in[14], S34, 4259657740); /* 36 */ + HH(a, b, c, d, in[1], S31, 2763975236); /* 37 */ + HH(d, a, b, c, in[4], S32, 1272893353); /* 38 */ + HH(c, d, a, b, in[7], S33, 4139469664); /* 39 */ + HH(b, c, d, a, in[10], S34, 3200236656); /* 40 */ + HH(a, b, c, d, in[13], S31, 681279174); /* 41 */ + HH(d, a, b, c, in[0], S32, 3936430074); /* 42 */ + HH(c, d, a, b, in[3], S33, 3572445317); /* 43 */ + HH(b, c, d, a, in[6], S34, 76029189); /* 44 */ + HH(a, b, c, d, in[9], S31, 3654602809); /* 45 */ + HH(d, a, b, c, in[12], S32, 3873151461); /* 46 */ + HH(c, d, a, b, in[15], S33, 530742520); /* 47 */ + HH(b, c, d, a, in[2], S34, 3299628645); /* 48 */ + + /* Round 4 */ +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + II(a, b, c, d, in[0], S41, 4096336452); /* 49 */ + II(d, a, b, c, in[7], S42, 1126891415); /* 50 */ + II(c, d, a, b, in[14], S43, 2878612391); /* 51 */ + II(b, c, d, a, in[5], S44, 4237533241); /* 52 */ + II(a, b, c, d, in[12], S41, 1700485571); /* 53 */ + II(d, a, b, c, in[3], S42, 2399980690); /* 54 */ + II(c, d, a, b, in[10], S43, 4293915773); /* 55 */ + II(b, c, d, a, in[1], S44, 2240044497); /* 56 */ + II(a, b, c, d, in[8], S41, 1873313359); /* 57 */ + II(d, a, b, c, in[15], S42, 4264355552); /* 58 */ + II(c, d, a, b, in[6], S43, 2734768916); /* 59 */ + II(b, c, d, a, in[13], S44, 1309151649); /* 60 */ + II(a, b, c, d, in[4], S41, 4149444226); /* 61 */ + II(d, a, b, c, in[11], S42, 3174756917); /* 62 */ + II(c, d, a, b, in[2], S43, 718787259); /* 63 */ + II(b, c, d, a, in[9], S44, 3951481745); /* 64 */ + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +/* + ********************************************************************** + ** End of md5.c ** + ******************************* (cut) ******************************** + */ + +/* + ********************************************************************** + ** md5driver.c -- sample routines to test ** + ** RSA Data Security, Inc. MD5 message digest algorithm. ** + ** Created: 2/16/90 RLR ** + ** Updated: 1/91 SRD ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +#include +#include +#include +#include +/* -- include the following file if the file md5.h is separate -- */ +/* #include "md5.h" */ + +/* Prints message digest buffer in mdContext as 32 hexadecimal digits. + Order is from low-order byte to high-order byte of digest. + Each byte is printed with high-order hexadecimal digit first. + */ +static void MDPrint(mdContext) MD5_CTX *mdContext; +{ + int i; + + for(i = 0; i < 16; i++) + printf("%02x", mdContext->digest[i]); +} + +/* size of test block */ +#define TEST_BLOCK_SIZE 1000 + +/* number of blocks to process */ +#define TEST_BLOCKS 10000 + +/* number of test bytes = TEST_BLOCK_SIZE * TEST_BLOCKS */ +static long TEST_BYTES = (long)TEST_BLOCK_SIZE * (long)TEST_BLOCKS; + +/* A time trial routine, to measure the speed of MD5. + Measures wall time required to digest TEST_BLOCKS * TEST_BLOCK_SIZE + characters. + */ +static void MDTimeTrial() +{ + MD5_CTX mdContext; + time_t endTime, startTime; + unsigned char data[TEST_BLOCK_SIZE]; + unsigned int i; + + /* initialize test data */ + for(i = 0; i < TEST_BLOCK_SIZE; i++) + data[i] = (unsigned char)(i & 0xFF); + + /* start timer */ + printf("MD5 time trial. Processing %ld characters...\n", TEST_BYTES); + time(&startTime); + + /* digest data in TEST_BLOCK_SIZE byte blocks */ + MD5Init(&mdContext); + for(i = TEST_BLOCKS; i > 0; i--) + MD5Update(&mdContext, data, TEST_BLOCK_SIZE); + MD5Final(&mdContext); + + /* stop timer, get time difference */ + time(&endTime); + MDPrint(&mdContext); + printf(" is digest of test input.\n"); + printf("Seconds to process test input: %ld\n", (long)(endTime - startTime)); + printf("Characters processed per second: %ld\n", TEST_BYTES / (endTime - startTime)); +} + +/* Computes the message digest for string inString. + Prints out message digest, a space, the string (in quotes) and a + carriage return. + */ +static void MDString(inString) char *inString; +{ + MD5_CTX mdContext; + unsigned int len = strlen(inString); + + MD5Init(&mdContext); + MD5Update(&mdContext, (unsigned char *)inString, len); + MD5Final(&mdContext); + MDPrint(&mdContext); + printf(" \"%s\"\n\n", inString); +} + +/* Computes the message digest for a specified file. + Prints out message digest, a space, the file name, and a carriage + return. + */ +static void MDFile(filename) char *filename; +{ + FILE *inFile = fopen(filename, "rb"); + MD5_CTX mdContext; + int bytes; + unsigned char data[1024]; + + if(inFile == NULL) + { + printf("%s can't be opened.\n", filename); + return; + } + + MD5Init(&mdContext); + while((bytes = fread(data, 1, 1024, inFile)) != 0) + MD5Update(&mdContext, data, bytes); + MD5Final(&mdContext); + MDPrint(&mdContext); + printf(" %s\n", filename); + fclose(inFile); +} + +/* Writes the message digest of the data from stdin onto stdout, + followed by a carriage return. + */ +static void MDFilter() +{ + MD5_CTX mdContext; + int bytes; + unsigned char data[16]; + + MD5Init(&mdContext); + while((bytes = fread(data, 1, 16, stdin)) != 0) + MD5Update(&mdContext, data, bytes); + MD5Final(&mdContext); + MDPrint(&mdContext); + printf("\n"); +} + +/* Runs a standard suite of test data. + */ +static void MDTestSuite() +{ + printf("MD5 test suite results:\n\n"); + MDString(""); + MDString("a"); + MDString("abc"); + MDString("message digest"); + MDString("abcdefghijklmnopqrstuvwxyz"); + MDString("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"); + MDString( + "1234567890123456789012345678901234567890\ +1234567890123456789012345678901234567890"); + /* Contents of file foo are "abc" */ + MDFile("foo"); +} diff --git a/src/amuse/community/arepo/src/debug_md5/Md5.h b/src/amuse/community/arepo/src/debug_md5/Md5.h new file mode 100644 index 0000000000..df809ed5d2 --- /dev/null +++ b/src/amuse/community/arepo/src/debug_md5/Md5.h @@ -0,0 +1,92 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/debug_md5/Md5.h + * \date 05/2018 + * \brief Header for implementation of MD5 checksums. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +/* + ********************************************************************** + ** md5.h -- Header file for implementation of MD5 ** + ** RSA Data Security, Inc. MD5 Message Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 12/27/90 SRD,AJ,BSK,JT Reference C version ** + ** Revised (for MD5): RLR 4/27/91 ** + ** -- G modified to have y&~z instead of y&z ** + ** -- FF, GG, HH modified to add in last register done ** + ** -- Access pattern: round 2 works mod 5, round 3 works mod 3 ** + ** -- distinct additive constant for each step ** + ** -- round 4 added, working mod 7 ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +/* typedef a 32 bit type */ +typedef unsigned long int UINT4; + +/* Data structure for MD5 (Message Digest) computation */ +typedef struct +{ + UINT4 i[2]; /* number of _bits_ handled mod 2^64 */ + UINT4 buf[4]; /* scratch buffer */ + unsigned char in[64]; /* input buffer */ + unsigned char digest[16]; /* actual digest after MD5Final call */ +} MD5_CTX; + +void MD5Final(MD5_CTX *mdContext); +// void MD5Update(MD5_CTX * mdContext, unsigned char *inBuf, unsigned int inLen); +void MD5UpdateLong(MD5_CTX *mdContext, unsigned char *inBuf, unsigned long long inLenLong); +void MD5Init(MD5_CTX *mdContext); + +/* + ********************************************************************** + ** End of md5.h ** + ******************************* (cut) ******************************** + */ diff --git a/src/amuse/community/arepo/src/debug_md5/calc_checksum.c b/src/amuse/community/arepo/src/debug_md5/calc_checksum.c new file mode 100644 index 0000000000..3f710ef036 --- /dev/null +++ b/src/amuse/community/arepo/src/debug_md5/calc_checksum.c @@ -0,0 +1,121 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/debug_md5/calc_checksum.c + * \date 05/2018 + * \brief Functions to calculate an MD5 checksum from a dataset. + * \details contains functions: + * void calc_memory_checksum(void *base, size_t bytes) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "Md5.h" + +/*! \brief Calculates a md5 checksum (on all MPI tasks) and prints it. + * + * \param[in] base Pointer to start of data. + * \param[in] bytes Number of bytes to be checked. + * + * \return void + */ +void calc_memory_checksum(void *base, size_t bytes) +{ + MD5_CTX sum; + union + { + unsigned char digest[16]; + int val[4]; + } u, uglob; + + MD5Init(&sum); + MD5UpdateLong(&sum, base, bytes); + MD5Final(&sum); + + int i; + + for(i = 0; i < 16; i++) + u.digest[i] = sum.digest[i]; + + MPI_Allreduce(u.val, uglob.val, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + printf("Step=%d MD5=", All.NumCurrentTiStep); + for(i = 0; i < 16; i++) + printf("%02x", uglob.digest[i]); + printf("\n"); + } +} + +#ifdef RESTART_DEBUG +/*! \brief Calculates md5 checksums of main data structures of a restart file. + * + * \return void + */ +void log_restart_debug(void) +{ + MD5_CTX sum; + union + { + unsigned char digest[16]; + int val[4]; + } u, uglob_P, uglob_SphP; + int i; + + MD5Init(&sum); + MD5UpdateLong(&sum, (void *)P, NumPart * sizeof(struct particle_data)); + MD5Final(&sum); + + for(i = 0; i < 16; i++) + u.digest[i] = sum.digest[i]; + + MPI_Allreduce(u.val, uglob_P.val, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + MD5Init(&sum); + MD5UpdateLong(&sum, (void *)SphP, NumGas * sizeof(struct sph_particle_data)); + MD5Final(&sum); + + for(i = 0; i < 16; i++) + u.digest[i] = sum.digest[i]; + + MPI_Allreduce(u.val, uglob_SphP.val, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + fprintf(FdRestartTest, "\n"); + fprintf(FdRestartTest, "Step=%8d P[] ", All.NumCurrentTiStep); + for(i = 0; i < 16; i++) + fprintf(FdRestartTest, "%02x", uglob_P.digest[i]); + fprintf(FdRestartTest, "\n"); + fprintf(FdRestartTest, " SphP[] "); + for(i = 0; i < 16; i++) + fprintf(FdRestartTest, "%02x", uglob_SphP.digest[i]); + fprintf(FdRestartTest, "\n"); + fflush(FdRestartTest); + } +} +#endif diff --git a/src/amuse/community/arepo/src/domain/bsd_tree.h b/src/amuse/community/arepo/src/domain/bsd_tree.h new file mode 100644 index 0000000000..c8f763abdf --- /dev/null +++ b/src/amuse/community/arepo/src/domain/bsd_tree.h @@ -0,0 +1,865 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/bsd_tree.h + * \date 05/2018 + * \brief BSD tree. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +/* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */ +/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */ +/* $FreeBSD: src/sys/sys/tree.h,v 1.9.4.2 2012/11/17 11:37:26 svnexp Exp $ */ + +/*- + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_TREE_H_ +#define _SYS_TREE_H_ + +#include + +/* + * This file defines data structures for different types of trees: + * splay trees and red-black trees. + * + * A splay tree is a self-organizing data structure. Every operation + * on the tree causes a splay to happen. The splay moves the requested + * node to the root of the tree and partly rebalances it. + * + * This has the benefit that request locality causes faster lookups as + * the requested nodes move to the top of the tree. On the other hand, + * every lookup causes memory writes. + * + * The Balance Theorem bounds the total access time for m operations + * and n inserts on an initially empty tree as O((m + n)lg n). The + * amortized cost for a sequence of m accesses to a splay tree is O(lg n); + * + * A red-black tree is a binary search tree with the node color as an + * extra attribute. It fulfills a set of conditions: + * - every search path from the root to a leaf consists of the + * same number of black nodes, + * - each red node (except for the root) has a black parent, + * - each leaf node is black. + * + * Every operation on a red-black tree is bounded as O(lg n). + * The maximum height of a red-black tree is 2lg (n+1). + */ + +#define SPLAY_HEAD(name, type) \ + struct name \ + { \ + struct type *sph_root; /* root of the tree */ \ + } + +#define SPLAY_INITIALIZER(root) \ + { \ + NULL \ + } + +#define SPLAY_INIT(root) \ + do \ + { \ + (root)->sph_root = NULL; \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_ENTRY(type) \ + struct \ + { \ + struct type *spe_left; /* left element */ \ + struct type *spe_right; /* right element */ \ + } + +#define SPLAY_LEFT(elm, field) (elm)->field.spe_left +#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right +#define SPLAY_ROOT(head) (head)->sph_root +#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) + +/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ +#define SPLAY_ROTATE_RIGHT(head, tmp, field) \ + do \ + { \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_ROTATE_LEFT(head, tmp, field) \ + do \ + { \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_LINKLEFT(head, tmp, field) \ + do \ + { \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_LINKRIGHT(head, tmp, field) \ + do \ + { \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_ASSEMBLE(head, node, left, right, field) \ + do \ + { \ + SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field); \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ + } \ + while(/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ + +#define SPLAY_PROTOTYPE(name, type, field, cmp) \ + void name##_SPLAY(struct name *, struct type *); \ + void name##_SPLAY_MINMAX(struct name *, int); \ + struct type *name##_SPLAY_INSERT(struct name *, struct type *); \ + struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \ + \ + /* Finds the node with the same key as elm */ \ + static __inline struct type *name##_SPLAY_FIND(struct name *head, struct type *elm) \ + { \ + if(SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if((cmp)(elm, (head)->sph_root) == 0) \ + return (head->sph_root); \ + return (NULL); \ + } \ + \ + static __inline struct type *name##_SPLAY_NEXT(struct name *head, struct type *elm) \ + { \ + name##_SPLAY(head, elm); \ + if(SPLAY_RIGHT(elm, field) != NULL) \ + { \ + elm = SPLAY_RIGHT(elm, field); \ + while(SPLAY_LEFT(elm, field) != NULL) \ + { \ + elm = SPLAY_LEFT(elm, field); \ + } \ + } \ + else \ + elm = NULL; \ + return (elm); \ + } \ + \ + static __inline struct type *name##_SPLAY_MIN_MAX(struct name *head, int val) \ + { \ + name##_SPLAY_MINMAX(head, val); \ + return (SPLAY_ROOT(head)); \ + } + +/* Main splay operation. + * Moves node close to the key of elm to top + */ +#define SPLAY_GENERATE(name, type, field, cmp) \ + struct type *name##_SPLAY_INSERT(struct name *head, struct type *elm) \ + { \ + if(SPLAY_EMPTY(head)) \ + { \ + SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ + } \ + else \ + { \ + int __comp; \ + name##_SPLAY(head, elm); \ + __comp = (cmp)(elm, (head)->sph_root); \ + if(__comp < 0) \ + { \ + SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_RIGHT(elm, field) = (head)->sph_root; \ + SPLAY_LEFT((head)->sph_root, field) = NULL; \ + } \ + else if(__comp > 0) \ + { \ + SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field); \ + SPLAY_LEFT(elm, field) = (head)->sph_root; \ + SPLAY_RIGHT((head)->sph_root, field) = NULL; \ + } \ + else \ + return ((head)->sph_root); \ + } \ + (head)->sph_root = (elm); \ + return (NULL); \ + } \ + \ + struct type *name##_SPLAY_REMOVE(struct name *head, struct type *elm) \ + { \ + struct type *__tmp; \ + if(SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if((cmp)(elm, (head)->sph_root) == 0) \ + { \ + if(SPLAY_LEFT((head)->sph_root, field) == NULL) \ + { \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ + } \ + else \ + { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ + name##_SPLAY(head, elm); \ + SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ + } \ + return (elm); \ + } \ + return (NULL); \ + } \ + \ + void name##_SPLAY(struct name *head, struct type *elm) \ + { \ + struct type __node, *__left, *__right, *__tmp; \ + int __comp; \ + \ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL; \ + __left = __right = &__node; \ + \ + while((__comp = (cmp)(elm, (head)->sph_root)) != 0) \ + { \ + if(__comp < 0) \ + { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if(__tmp == NULL) \ + break; \ + if((cmp)(elm, __tmp) < 0) \ + { \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if(SPLAY_LEFT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } \ + else if(__comp > 0) \ + { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if(__tmp == NULL) \ + break; \ + if((cmp)(elm, __tmp) > 0) \ + { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if(SPLAY_RIGHT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ + } \ + \ + /* Splay with either the minimum or the maximum element \ + * Used to find minimum or maximum element in tree. \ + */ \ + void name##_SPLAY_MINMAX(struct name *head, int __comp) \ + { \ + struct type __node, *__left, *__right, *__tmp; \ + \ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL; \ + __left = __right = &__node; \ + \ + while(1) \ + { \ + if(__comp < 0) \ + { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if(__tmp == NULL) \ + break; \ + if(__comp < 0) \ + { \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if(SPLAY_LEFT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } \ + else if(__comp > 0) \ + { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if(__tmp == NULL) \ + break; \ + if(__comp > 0) \ + { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if(SPLAY_RIGHT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ + } + +#define SPLAY_NEGINF -1 +#define SPLAY_INF 1 + +#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) +#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) +#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) +#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) +#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) +#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) + +#define SPLAY_FOREACH(x, name, head) for((x) = SPLAY_MIN(name, head); (x) != NULL; (x) = SPLAY_NEXT(name, head, x)) + +/* Macros that define a red-black tree */ +#define RB_HEAD(name, type) \ + struct name \ + { \ + struct type *rbh_root; /* root of the tree */ \ + } + +#define RB_INITIALIZER(root) \ + { \ + NULL \ + } + +#define RB_INIT(root) \ + do \ + { \ + (root)->rbh_root = NULL; \ + } \ + while(/*CONSTCOND*/ 0) + +#define RB_BLACK 0 +#define RB_RED 1 +#define RB_ENTRY(type) \ + struct \ + { \ + struct type *rbe_left; /* left element */ \ + struct type *rbe_right; /* right element */ \ + struct type *rbe_parent; /* parent element */ \ + int rbe_color; /* node color */ \ + } + +#define RB_LEFT(elm, field) (elm)->field.rbe_left +#define RB_RIGHT(elm, field) (elm)->field.rbe_right +#define RB_PARENT(elm, field) (elm)->field.rbe_parent +#define RB_COLOR(elm, field) (elm)->field.rbe_color +#define RB_ROOT(head) (head)->rbh_root +#define RB_EMPTY(head) (RB_ROOT(head) == NULL) + +#define RB_SET(elm, parent, field) \ + do \ + { \ + RB_PARENT(elm, field) = parent; \ + RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ + RB_COLOR(elm, field) = RB_RED; \ + } \ + while(/*CONSTCOND*/ 0) + +#define RB_SET_BLACKRED(black, red, field) \ + do \ + { \ + RB_COLOR(black, field) = RB_BLACK; \ + RB_COLOR(red, field) = RB_RED; \ + } \ + while(/*CONSTCOND*/ 0) + +#ifndef RB_AUGMENT +#define RB_AUGMENT(x) \ + do \ + { \ + } \ + while(0) +#endif + +#define RB_ROTATE_LEFT(head, elm, tmp, field) \ + do \ + { \ + (tmp) = RB_RIGHT(elm, field); \ + if((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) \ + { \ + RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) \ + { \ + if((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } \ + else \ + (head)->rbh_root = (tmp); \ + RB_LEFT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ + } \ + while(/*CONSTCOND*/ 0) + +#define RB_ROTATE_RIGHT(head, elm, tmp, field) \ + do \ + { \ + (tmp) = RB_LEFT(elm, field); \ + if((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) \ + { \ + RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) \ + { \ + if((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } \ + else \ + (head)->rbh_root = (tmp); \ + RB_RIGHT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ + } \ + while(/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ +#define RB_PROTOTYPE(name, type, field, cmp) RB_PROTOTYPE_INTERNAL(name, type, field, cmp, ) +#define RB_PROTOTYPE_STATIC(name, type, field, cmp) RB_PROTOTYPE_INTERNAL(name, type, field, cmp, static) +#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \ + attr void name##_RB_INSERT_COLOR(struct name *, struct type *); \ + attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *); \ + attr struct type *name##_RB_REMOVE(struct name *, struct type *); \ + attr struct type *name##_RB_INSERT(struct name *, struct type *); \ + attr struct type *name##_RB_FIND(struct name *, struct type *); \ + attr struct type *name##_RB_NFIND(struct name *, struct type *); \ + attr struct type *name##_RB_NEXT(struct type *); \ + attr struct type *name##_RB_PREV(struct type *); \ + attr struct type *name##_RB_MINMAX(struct name *, int); + +/* Main rb operation. + * Moves node close to the key of elm to top + */ +#define RB_GENERATE(name, type, field, cmp) RB_GENERATE_INTERNAL(name, type, field, cmp, ) +#define RB_GENERATE_STATIC(name, type, field, cmp) RB_GENERATE_INTERNAL(name, type, field, cmp, static) +#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \ + attr void name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \ + { \ + struct type *parent, *gparent, *tmp; \ + while((parent = RB_PARENT(elm, field)) != NULL && RB_COLOR(parent, field) == RB_RED) \ + { \ + gparent = RB_PARENT(parent, field); \ + if(parent == RB_LEFT(gparent, field)) \ + { \ + tmp = RB_RIGHT(gparent, field); \ + if(tmp && RB_COLOR(tmp, field) == RB_RED) \ + { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field); \ + elm = gparent; \ + continue; \ + } \ + if(RB_RIGHT(parent, field) == elm) \ + { \ + RB_ROTATE_LEFT(head, parent, tmp, field); \ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_RIGHT(head, gparent, tmp, field); \ + } \ + else \ + { \ + tmp = RB_LEFT(gparent, field); \ + if(tmp && RB_COLOR(tmp, field) == RB_RED) \ + { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field); \ + elm = gparent; \ + continue; \ + } \ + if(RB_LEFT(parent, field) == elm) \ + { \ + RB_ROTATE_RIGHT(head, parent, tmp, field); \ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_LEFT(head, gparent, tmp, field); \ + } \ + } \ + RB_COLOR(head->rbh_root, field) = RB_BLACK; \ + } \ + \ + attr void name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \ + { \ + struct type *tmp; \ + while((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && elm != RB_ROOT(head)) \ + { \ + if(RB_LEFT(parent, field) == elm) \ + { \ + tmp = RB_RIGHT(parent, field); \ + if(RB_COLOR(tmp, field) == RB_RED) \ + { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_LEFT(head, parent, tmp, field); \ + tmp = RB_RIGHT(parent, field); \ + } \ + if((RB_LEFT(tmp, field) == NULL || RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) && \ + (RB_RIGHT(tmp, field) == NULL || RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) \ + { \ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } \ + else \ + { \ + if(RB_RIGHT(tmp, field) == NULL || RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) \ + { \ + struct type *oleft; \ + if((oleft = RB_LEFT(tmp, field)) != NULL) \ + RB_COLOR(oleft, field) = RB_BLACK; \ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_RIGHT(head, tmp, oleft, field); \ + tmp = RB_RIGHT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field); \ + RB_COLOR(parent, field) = RB_BLACK; \ + if(RB_RIGHT(tmp, field)) \ + RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK; \ + RB_ROTATE_LEFT(head, parent, tmp, field); \ + elm = RB_ROOT(head); \ + break; \ + } \ + } \ + else \ + { \ + tmp = RB_LEFT(parent, field); \ + if(RB_COLOR(tmp, field) == RB_RED) \ + { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_RIGHT(head, parent, tmp, field); \ + tmp = RB_LEFT(parent, field); \ + } \ + if((RB_LEFT(tmp, field) == NULL || RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) && \ + (RB_RIGHT(tmp, field) == NULL || RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) \ + { \ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } \ + else \ + { \ + if(RB_LEFT(tmp, field) == NULL || RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) \ + { \ + struct type *oright; \ + if((oright = RB_RIGHT(tmp, field)) != NULL) \ + RB_COLOR(oright, field) = RB_BLACK; \ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_LEFT(head, tmp, oright, field); \ + tmp = RB_LEFT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field); \ + RB_COLOR(parent, field) = RB_BLACK; \ + if(RB_LEFT(tmp, field)) \ + RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK; \ + RB_ROTATE_RIGHT(head, parent, tmp, field); \ + elm = RB_ROOT(head); \ + break; \ + } \ + } \ + } \ + if(elm) \ + RB_COLOR(elm, field) = RB_BLACK; \ + } \ + \ + attr struct type *name##_RB_REMOVE(struct name *head, struct type *elm) \ + { \ + struct type *child, *parent, *old = elm; \ + int color; \ + if(RB_LEFT(elm, field) == NULL) \ + child = RB_RIGHT(elm, field); \ + else if(RB_RIGHT(elm, field) == NULL) \ + child = RB_LEFT(elm, field); \ + else \ + { \ + struct type *left; \ + elm = RB_RIGHT(elm, field); \ + while((left = RB_LEFT(elm, field)) != NULL) \ + elm = left; \ + child = RB_RIGHT(elm, field); \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if(child) \ + RB_PARENT(child, field) = parent; \ + if(parent) \ + { \ + if(RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } \ + else \ + RB_ROOT(head) = child; \ + if(RB_PARENT(elm, field) == old) \ + parent = elm; \ + (elm)->field = (old)->field; \ + if(RB_PARENT(old, field)) \ + { \ + if(RB_LEFT(RB_PARENT(old, field), field) == old) \ + RB_LEFT(RB_PARENT(old, field), field) = elm; \ + else \ + RB_RIGHT(RB_PARENT(old, field), field) = elm; \ + RB_AUGMENT(RB_PARENT(old, field)); \ + } \ + else \ + RB_ROOT(head) = elm; \ + RB_PARENT(RB_LEFT(old, field), field) = elm; \ + if(RB_RIGHT(old, field)) \ + RB_PARENT(RB_RIGHT(old, field), field) = elm; \ + if(parent) \ + { \ + left = parent; \ + do \ + { \ + RB_AUGMENT(left); \ + } \ + while((left = RB_PARENT(left, field)) != NULL); \ + } \ + goto color; \ + } \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if(child) \ + RB_PARENT(child, field) = parent; \ + if(parent) \ + { \ + if(RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } \ + else \ + RB_ROOT(head) = child; \ + color: \ + if(color == RB_BLACK) \ + name##_RB_REMOVE_COLOR(head, parent, child); \ + return (old); \ + } \ + \ + /* Inserts a node into the RB tree */ \ + attr struct type *name##_RB_INSERT(struct name *head, struct type *elm) \ + { \ + struct type *tmp; \ + struct type *parent = NULL; \ + int comp = 0; \ + tmp = RB_ROOT(head); \ + while(tmp) \ + { \ + parent = tmp; \ + comp = (cmp)(elm, parent); \ + if(comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if(comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + RB_SET(elm, parent, field); \ + if(parent != NULL) \ + { \ + if(comp < 0) \ + RB_LEFT(parent, field) = elm; \ + else \ + RB_RIGHT(parent, field) = elm; \ + RB_AUGMENT(parent); \ + } \ + else \ + RB_ROOT(head) = elm; \ + name##_RB_INSERT_COLOR(head, elm); \ + return (NULL); \ + } \ + \ + /* Finds the node with the same key as elm */ \ + attr struct type *name##_RB_FIND(struct name *head, struct type *elm) \ + { \ + struct type *tmp = RB_ROOT(head); \ + int comp; \ + while(tmp) \ + { \ + comp = cmp(elm, tmp); \ + if(comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if(comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (NULL); \ + } \ + \ + /* Finds the first node greater than or equal to the search key */ \ + attr struct type *name##_RB_NFIND(struct name *head, struct type *elm) \ + { \ + struct type *tmp = RB_ROOT(head); \ + struct type *res = NULL; \ + int comp; \ + while(tmp) \ + { \ + comp = cmp(elm, tmp); \ + if(comp < 0) \ + { \ + res = tmp; \ + tmp = RB_LEFT(tmp, field); \ + } \ + else if(comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (res); \ + } \ + \ + /* ARGSUSED */ \ + attr struct type *name##_RB_NEXT(struct type *elm) \ + { \ + if(RB_RIGHT(elm, field)) \ + { \ + elm = RB_RIGHT(elm, field); \ + while(RB_LEFT(elm, field)) \ + elm = RB_LEFT(elm, field); \ + } \ + else \ + { \ + if(RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else \ + { \ + while(RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ + } \ + \ + /* ARGSUSED */ \ + attr struct type *name##_RB_PREV(struct type *elm) \ + { \ + if(RB_LEFT(elm, field)) \ + { \ + elm = RB_LEFT(elm, field); \ + while(RB_RIGHT(elm, field)) \ + elm = RB_RIGHT(elm, field); \ + } \ + else \ + { \ + if(RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else \ + { \ + while(RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ + } \ + \ + attr struct type *name##_RB_MINMAX(struct name *head, int val) \ + { \ + struct type *tmp = RB_ROOT(head); \ + struct type *parent = NULL; \ + while(tmp) \ + { \ + parent = tmp; \ + if(val < 0) \ + tmp = RB_LEFT(tmp, field); \ + else \ + tmp = RB_RIGHT(tmp, field); \ + } \ + return (parent); \ + } + +#define RB_NEGINF -1 +#define RB_INF 1 + +#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) +#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) +#define RB_FIND(name, x, y) name##_RB_FIND(x, y) +#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y) +#define RB_NEXT(name, x, y) name##_RB_NEXT(y) +#define RB_PREV(name, x, y) name##_RB_PREV(y) +#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) +#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) + +#define RB_FOREACH(x, name, head) for((x) = RB_MIN(name, head); (x) != NULL; (x) = name##_RB_NEXT(x)) + +#define RB_FOREACH_FROM(x, name, y) for((x) = (y); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); (x) = (y)) + +#define RB_FOREACH_SAFE(x, name, head, y) \ + for((x) = RB_MIN(name, head); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); (x) = (y)) + +#define RB_FOREACH_REVERSE(x, name, head) for((x) = RB_MAX(name, head); (x) != NULL; (x) = name##_RB_PREV(x)) + +#define RB_FOREACH_REVERSE_FROM(x, name, y) for((x) = (y); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); (x) = (y)) + +#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \ + for((x) = RB_MAX(name, head); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); (x) = (y)) + +#endif /* _SYS_TREE_H_ */ diff --git a/src/amuse/community/arepo/src/domain/domain.c b/src/amuse/community/arepo/src/domain/domain.c new file mode 100644 index 0000000000..4557c25ff5 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain.c @@ -0,0 +1,633 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain.c + * \date 05/2018 + * \brief Code for domain decomposition. + * \details This file contains the code for the domain decomposition of the + * simulation volume. The domains are constructed from disjoint + * subsets of the leaves of a fiducial top-level tree that covers + * the full simulation volume. Domain boundaries hence run along + * tree-node divisions of a fiducial global Barnes-Hut tree. As a + * result of this method, the tree force are in principle strictly + * independent of the way the domains are cut. The domain + * decomposition can be carried out for an arbitrary number of + * CPUs. Individual domains are not cubical, but spatially + * coherent since the leaves are traversed in a Peano-Hilbert + * order and individual domains form segments along this order. + * This also ensures that each domain has a small surface to + * volume ratio, which minimizes communication. + * contains functions: + * void domain_Decomposition(void) + * void domain_prepare_voronoi_dynamic_update(void) + * void domain_voronoi_dynamic_flag_particles(void) + * void domain_voronoi_dynamic_update_execute(void) + * void domain_preserve_relevant_topnode_data(void) + * void domain_find_total_cost(void) + * peano1D domain_double_to_int(double d) + * void domain_allocate(void) + * void domain_free(void) + * void domain_printf(char *buf) + * void domain_report_balance(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 16.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief The main routine for the domain decomposition. + * + * It acts as a driver routine that allocates various temporary buffers, + * maps the particles back onto the periodic box if needed, and then does the + * domain decomposition, and a final Peano-Hilbert order of all particles + * as a tuning measure. + * + * \return void + */ +void domain_Decomposition(void) +{ + TIMER_START(CPU_DOMAIN); + + double t0 = second(); + + mpi_printf("DOMAIN: Begin domain decomposition (sync-point %d).\n", All.NumCurrentTiStep); + + /* Prepare */ + domain_prepare_voronoi_dynamic_update(); + + /* map the particles back onto the box */ + do_box_wrapping(); + + /* Initialize and allocate */ + domain_init_sum_cost(); + domain_allocate(); + domain_allocate_lists(); + + topNodes = (struct local_topnode_data *)mymalloc_movable(&topNodes, "topNodes", (MaxTopNodes * sizeof(struct local_topnode_data))); + /* find total cost factors */ + domain_find_total_cost(); + /* determine global dimensions of domain grid */ + domain_findExtent(); + + /* determine top-level tree */ + domain_determineTopTree(); + + /* find the split of the top-level tree */ + domain_combine_topleaves_to_domains(All.MultipleDomains * NTask, NTopleaves); + + /* combine on each MPI task several of the domains (namely the number All.MultipleDomains) */ + domain_combine_multipledomains(); + + /* permutate the task assignment such that the smallest number of particles needs to be moved */ + domain_optimize_domain_to_task_mapping(); + + double ta = second(); + /* in case we retain the neighbor connectivity, do some preparatory flagging */ + domain_voronoi_dynamic_flag_particles(); + /* eliminate cells that might have been eliminated or were turned into stars */ + domain_rearrange_particle_sequence(); + /* determine for each cpu how many particles have to be shifted to other cpus */ + domain_countToGo(); + double tb = second(); + mpi_printf("DOMAIN: particle rearrangement work took %g sec\n", timediff(ta, tb)); + + /* finally, carry out the actual particle exchange */ + domain_exchange(); + + /* copy what we need for the topnodes */ + domain_preserve_relevant_topnode_data(); + myfree(topNodes); + domain_free_lists(); + TimeOfLastDomainConstruction = All.Time; + + double t1 = second(); + mpi_printf("DOMAIN: domain decomposition done. (took in total %g sec)\n", timediff(t0, t1)); + + TIMER_STOP(CPU_DOMAIN); + TIMER_START(CPU_PEANO); + + peano_hilbert_order(); + myfree(Key); + + TIMER_STOPSTART(CPU_PEANO, CPU_DOMAIN); + + myfree(DomainListOfLocalTopleaves); + +#ifdef ONEDIMS + voronoi_1D_order(); +#endif /* #ifdef ONEDIMS */ + + TopNodes = (struct topnode_data *)myrealloc_movable(TopNodes, NTopnodes * sizeof(struct topnode_data)); + DomainTask = (int *)myrealloc_movable(DomainTask, NTopleaves * sizeof(int)); + + domain_voronoi_dynamic_update_execute(); + + DomainListOfLocalTopleaves = + (int *)mymalloc_movable(&DomainListOfLocalTopleaves, "DomainListOfLocalTopleaves", (NTopleaves * sizeof(int))); + + memset(DomainNLocalTopleave, 0, NTask * sizeof(int)); + + for(int i = 0; i < NTopleaves; i++) + DomainNLocalTopleave[DomainTask[i]]++; + + DomainFirstLocTopleave[0] = 0; + for(int i = 1; i < NTask; i++) + DomainFirstLocTopleave[i] = DomainFirstLocTopleave[i - 1] + DomainNLocalTopleave[i - 1]; + + memset(DomainNLocalTopleave, 0, NTask * sizeof(int)); + + for(int i = 0; i < NTopleaves; i++) + { + int task = DomainTask[i]; + int off = DomainFirstLocTopleave[task] + DomainNLocalTopleave[task]++; + DomainListOfLocalTopleaves[off] = i; + } + + reconstruct_timebins(); + + for(int i = 0; i < GRAVCOSTLEVELS; i++) + All.LevelHasBeenMeasured[i] = 0; + + domain_report_balance(); + + TIMER_STOP(CPU_DOMAIN); +} + +/*! \brief Prepares for voronoi dynamic update. + * + * Allocates required arrays and communicates required information. + * + * \return void + */ +void domain_prepare_voronoi_dynamic_update(void) +{ + /* prepare storage for translation table */ + N_trans = NumGas; /* length of translation table */ + trans_table = mymalloc_movable(&trans_table, "trans_table", N_trans * sizeof(struct trans_data)); + MPI_Allreduce(&Nvc, &Largest_Nvc, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); +} + +/*! \brief Flag particles that need to be exported. + * + * Go through all local particles and check if they are within new local + * domains. If this is not the case, flag them as to be exported. + * + * \return void + */ +void domain_voronoi_dynamic_flag_particles(void) +{ + /* flag the particles that need to be exported */ + for(int i = 0; i < NumPart; i++) + { + int no = 0; + + while(topNodes[no].Daughter >= 0) + no = topNodes[no].Daughter + (Key[i] - topNodes[no].StartKey) / (topNodes[no].Size >> 3); + + no = topNodes[no].Leaf; + + int task = DomainTask[no]; + domain_mark_in_trans_table(i, task); + } +} + +/*! \brief Execute voronoi_dynamic_update + * + * Calls domain_exchange_and_update_DC() if needed. + * + * \return void + */ +void domain_voronoi_dynamic_update_execute(void) +{ + CPU_Step[CPU_DOMAIN] += measure_time(); + if(Largest_Nvc > 0) + domain_exchange_and_update_DC(); + + myfree_movable(trans_table); + + CPU_Step[CPU_MESH_DYNAMIC] += measure_time(); +} + +/*! \brief Save the new top-level tree data into global arrays. + * + * \return void + */ +void domain_preserve_relevant_topnode_data(void) +{ + for(int i = 0; i < NTopnodes; i++) + { + TopNodes[i].StartKey = topNodes[i].StartKey; + TopNodes[i].Size = topNodes[i].Size; + TopNodes[i].Daughter = topNodes[i].Daughter; + TopNodes[i].Leaf = topNodes[i].Leaf; + + int bits = my_ffsll(TopNodes[i].Size); + int blocks = (bits - 1) / 3 - 1; + + for(int j = 0; j < 8; j++) + { + peano1D xb, yb, zb; + peano_hilbert_key_inverse(TopNodes[i].StartKey + j * (TopNodes[i].Size >> 3), BITS_PER_DIMENSION, &xb, &yb, &zb); + xb >>= blocks; + yb >>= blocks; + zb >>= blocks; + int idx = (xb & 1) | ((yb & 1) << 1) | ((zb & 1) << 2); + if(idx < 0 || idx > 7) + terminate("j=%d idx=%d", j, idx); + + TopNodes[i].MortonToPeanoSubnode[idx] = j; + } + } +} + +/*! \brief Calculates the total cost of different operations. + * + * This function gathers information about the cost of gravity and + * hydrodynamics calculation as well as the particle load. + * + * \return void + */ +void domain_find_total_cost(void) +{ + if(All.MultipleDomains < 1 || All.MultipleDomains > 512) + terminate("All.MultipleDomains < 1 || All.MultipleDomains > 512"); + + gravcost = sphcost = 0; + double partcount = 0; + double sphpartcount = 0; + + for(int i = 0; i < NumPart; i++) + { +#ifdef ADDBACKGROUNDGRID + if(P[i].Type != 0) + continue; +#endif /* #ifdef ADDBACKGROUNDGRID */ + partcount += 1.0; + + gravcost += domain_grav_tot_costfactor(i); + + double hydrocost = domain_hydro_tot_costfactor(i); + sphcost += hydrocost; + + if(hydrocost > 0) + sphpartcount += 1.0; + } + + double loc[4] = {gravcost, sphcost, partcount, sphpartcount}, sum[4]; + + MPI_Allreduce(loc, sum, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + totgravcost = sum[0]; + totsphcost = sum[1]; + totpartcount = sum[2]; + double totsphpartcount = sum[3]; + + if(totsphcost > 0 && totgravcost > 0 && totsphpartcount > (All.TopNodeFactor * All.MultipleDomains * NTask)) + { + /* in this case we give equal weight to gravitational work-load, hydro work load, and particle load. + */ + normsum_work = 0.333333; + normsum_load = 0.333333; + normsum_worksph = 0.333333; + fac_work = normsum_work / totgravcost; + fac_load = normsum_load / totpartcount; + fac_worksph = normsum_worksph / totsphcost; + } + else if(totgravcost > 0) + { + /* in this case we give equal weight to gravitational work-load and particle load. + * The final pieces should have at most imbalance 2.0 in either of the two + */ + normsum_work = 0.5; + normsum_load = 0.5; + normsum_worksph = 0; + fac_work = normsum_work / totgravcost; + fac_load = normsum_load / totpartcount; + fac_worksph = 0.0; + } + else if(totsphcost > 0) + { + /* here we only appear to do hydrodynamics. We hence give equal weight to SPH cost and + * particle load. + */ + normsum_work = 0; + normsum_load = 0.5; + normsum_worksph = 0.5; + fac_work = 0.0; + fac_load = normsum_load / totpartcount; + fac_worksph = normsum_worksph / totsphcost; + } + else + terminate("strange: totsphcost=%g totgravcost=%g\n", totsphcost, totgravcost); +} + +/*! \brief Coordinate conversion to integer. + * + * \param[in] d coordinate in double precision. + * + * \return coordinate in integer of type peano1D. + */ +peano1D domain_double_to_int(double d) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = d; + return (peano1D)((u.ull & 0xFFFFFFFFFFFFFllu) >> (52 - BITS_PER_DIMENSION)); +} + +/*! \brief Allocates memory + * + * This function allocates all the stuff that will be required for the + * tree-construction/walk later on. + * + * \return void + */ +void domain_allocate(void) +{ + MaxTopNodes = (int)(All.TopNodeAllocFactor * All.MaxPart + 1); + + if(DomainStartList) + terminate("domain storage already allocated"); + + DomainStartList = (int *)mymalloc_movable(&DomainStartList, "DomainStartList", (NTask * All.MultipleDomains * sizeof(int))); + DomainEndList = (int *)mymalloc_movable(&DomainEndList, "DomainEndList", (NTask * All.MultipleDomains * sizeof(int))); + DomainFirstLocTopleave = (int *)mymalloc_movable(&DomainFirstLocTopleave, "DomainFirstLocTopleave", NTask * sizeof(int)); + DomainNLocalTopleave = (int *)mymalloc_movable(&DomainNLocalTopleave, "DomainNLocalTopleave", NTask * sizeof(int)); + TopNodes = (struct topnode_data *)mymalloc_movable(&TopNodes, "TopNodes", (MaxTopNodes * sizeof(struct topnode_data))); + DomainTask = (int *)mymalloc_movable(&DomainTask, "DomainTask", (MaxTopNodes * sizeof(int))); + DomainListOfLocalTopleaves = + (int *)mymalloc_movable(&DomainListOfLocalTopleaves, "DomainListOfLocalTopleaves", (MaxTopNodes * sizeof(int))); +} + +/*! \brief Free arrays needed in domain decomposition. + * + * This is the counterpart to domain_allocate; need to free arrays in reverse + * allocation order. + * + * \return void + */ +void domain_free(void) +{ + if(!DomainStartList) + terminate("domain storage not allocated"); + + myfree_movable(DomainListOfLocalTopleaves); + myfree_movable(DomainTask); + myfree_movable(TopNodes); + myfree_movable(DomainNLocalTopleave); + myfree_movable(DomainFirstLocTopleave); + myfree_movable(DomainEndList); + myfree_movable(DomainStartList); + + DomainTask = NULL; + TopNodes = NULL; + DomainNLocalTopleave = NULL; + DomainFirstLocTopleave = NULL; + DomainEndList = NULL; + DomainStartList = NULL; +} + +/*! \brief Print message in domain.txt logfile. + * + * \param[in] buf String to be printed to domain.txt. + * + * \return void + */ +void domain_printf(char *buf) +{ + if(RestartFlag <= 2) + fprintf(FdDomain, "%s", buf); +} + +/*! \brief Function that reports load-balancing + * + * Function calculates load-balancing of the simulation and prints + * it to domain.txt + * + * \return void + */ +void domain_report_balance(void) +{ + /* get total particle counts */ + long long loc_count[2 * TIMEBINS], glob_count[2 * TIMEBINS]; + + for(int i = 0; i < TIMEBINS; i++) + { + loc_count[i] = TimeBinsGravity.TimeBinCount[i]; + loc_count[TIMEBINS + i] = TimeBinsHydro.TimeBinCount[i]; + } + + MPI_Reduce(loc_count, glob_count, 2 * TIMEBINS, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + + double loc_max_data[2 * TIMEBINS + 3], glob_max_data[2 * TIMEBINS + 3]; + + loc_max_data[2 * TIMEBINS + 0] = NumPart; + loc_max_data[2 * TIMEBINS + 1] = NumGas; + loc_max_data[2 * TIMEBINS + 2] = NumPart - NumGas; + + double glob_sum_data[2 * TIMEBINS]; + + double *loc_HydroCost = &loc_max_data[0]; + double *loc_GravCost = &loc_max_data[TIMEBINS]; + double *max_HydroCost = &glob_max_data[0]; + double *max_GravCost = &glob_max_data[TIMEBINS]; + double *glob_HydroCost = &glob_sum_data[0]; + double *glob_GravCost = &glob_sum_data[TIMEBINS]; + + for(int i = 0; i < TIMEBINS; i++) + { + loc_GravCost[i] = 0; + loc_HydroCost[i] = 0; + } + +#ifdef SELFGRAVITY + for(int i = 0; i < NumPart; i++) + { + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { +#ifdef HIERARCHICAL_GRAVITY + if(bin >= P[i].TimeBinGrav) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + if(domain_bintolevel[bin] >= 0) + loc_GravCost[bin] += MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[bin]]; + else + { + if(domain_refbin[bin] >= 0) + loc_GravCost[bin] += + MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[domain_refbin[bin]]]; + else + loc_GravCost[bin] += 1.0; + } + } + } + } +#endif /* #ifdef SELFGRAVITY */ + + for(int i = 0; i < NumPart; i++) + if(P[i].Type == 0) + loc_HydroCost[P[i].TimeBinHydro] += 1.0; + + /* now determine the cumulative cost for the hydrodynamics */ + for(int i = 1; i <= All.HighestOccupiedTimeBin; i++) + loc_HydroCost[i] += loc_HydroCost[i - 1]; + + MPI_Reduce(loc_max_data, glob_sum_data, 2 * TIMEBINS, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(loc_max_data, glob_max_data, 2 * TIMEBINS + 3, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + double max_tot = glob_max_data[2 * TIMEBINS + 0]; + double max_sph = glob_max_data[2 * TIMEBINS + 1]; + double max_dm = glob_max_data[2 * TIMEBINS + 2]; + + long long *tot_count = &glob_count[0]; + long long *tot_count_sph = &glob_count[TIMEBINS]; + + long long tot_cumulative[TIMEBINS]; + tot_cumulative[0] = tot_count[0]; + + for(int i = 1; i < TIMEBINS; i++) + tot_cumulative[i] = tot_count[i] + tot_cumulative[i - 1]; + + double tot_gravcost = 0, max_gravcost = 0, tot_hydrocost = 0, max_hydrocost = 0; + + All.TotGravCost = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + All.TotGravCost += domain_to_be_balanced[i] * glob_GravCost[i] / NTask; + + tot_gravcost += domain_to_be_balanced[i] * glob_GravCost[i] / NTask; + max_gravcost += domain_to_be_balanced[i] * max_GravCost[i]; + + tot_hydrocost += domain_to_be_balanced[i] * glob_HydroCost[i] / NTask; + max_hydrocost += domain_to_be_balanced[i] * max_HydroCost[i]; + } + + double bal_grav_bin[TIMEBINS], bal_grav_bin_rel[TIMEBINS]; + double bal_hydro_bin[TIMEBINS], bal_hydro_bin_rel[TIMEBINS]; + + for(int i = 0; i < TIMEBINS; i++) + { + if(tot_count[i] > 0) + { + bal_grav_bin[i] = max_GravCost[i] / (glob_GravCost[i] / NTask + 1.0e-60); + bal_grav_bin_rel[i] = + (tot_gravcost + domain_to_be_balanced[i] * (max_GravCost[i] - glob_GravCost[i] / NTask)) / (tot_gravcost + 1.0e-60); + } + else + { + bal_grav_bin[i] = 0.0; + bal_grav_bin_rel[i] = 0.0; + } + + if(tot_count_sph[i] > 0) + { + bal_hydro_bin[i] = max_HydroCost[i] / (glob_HydroCost[i] / NTask + 1.0e-60); + bal_hydro_bin_rel[i] = (tot_hydrocost + domain_to_be_balanced[i] * (max_HydroCost[i] - glob_HydroCost[i] / NTask)) / + (tot_hydrocost + 1.0e-60); + } + else + { + bal_hydro_bin[i] = 0.0; + bal_hydro_bin_rel[i] = 0.0; + } + } + + char buf[1000]; + + sprintf(buf, "\nDOMAIN BALANCE, Sync-Point %d, Time: %g\n", All.NumCurrentTiStep, All.Time); + + domain_printf(buf); + + sprintf(buf, "Timebins: Gravity Hydro cumulative grav-balance hydro-balance\n"); + + domain_printf(buf); + + long long tot = 0, tot_sph = 0; + + for(int i = TIMEBINS - 1; i >= 0; i--) + { +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + if(tot_count_sph[i] > 0 || tot_count[i] > 0) +#else /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) */ + if(tot_count[i] > 0) + tot += tot_count[i]; + + if(tot_count_sph[i] > 0) +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + #else */ + { + char buf[1000]; + + sprintf(buf, "%c%cbin=%2d %10llu %10llu %10llu %c %6.3f |%6.3f %c %6.3f |%6.3f\n", + i == All.HighestActiveTimeBin ? '>' : ' ', i >= All.SmallestTimeBinWithDomainDecomposition ? '|' : ' ', i, + tot_count[i], tot_count_sph[i], tot_cumulative[i], domain_bintolevel[i] >= 0 ? 'm' : ' ', bal_grav_bin[i], + bal_grav_bin_rel[i], domain_to_be_balanced[i] > 0 ? '*' : ' ', bal_hydro_bin[i], bal_hydro_bin_rel[i]); + + domain_printf(buf); + + tot += tot_count[i]; + tot_sph += tot_count_sph[i]; + } + } + + sprintf(buf, "-------------------------------------------------------------------------------------\n"); + + domain_printf(buf); + + sprintf(buf, "BALANCE, LOAD: %6.3f %6.3f %6.3f WORK: %6.3f %6.3f\n", + max_dm / (tot - tot_sph + 1.0e-60) * NTask, max_sph / (tot_sph + 1.0e-60) * NTask, max_tot / (tot + 1.0e-60) * NTask, + max_gravcost / (tot_gravcost + 1.0e-60), max_hydrocost / (tot_hydrocost + 1.0e-60)); + + domain_printf(buf); + + sprintf(buf, "-------------------------------------------------------------------------------------\n"); + + domain_printf(buf); + + sprintf(buf, "\n"); + + domain_printf(buf); + + myflush(FdDomain); + } + + /* the following needs to be known by all the tasks */ + MPI_Bcast(&All.TotGravCost, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); +} diff --git a/src/amuse/community/arepo/src/domain/domain.h b/src/amuse/community/arepo/src/domain/domain.h new file mode 100644 index 0000000000..f52781918a --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain.h @@ -0,0 +1,156 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain.h + * \date 05/2018 + * \brief Header for domain decomposition. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef ALLVARS_H +#include "../main/allvars.h" +#endif /* #ifndef ALLVARS_H */ + +#ifndef DOMAIN_H +#define DOMAIN_H + +#define MASK_ACTIVE_FLAG_IN_TYPE 127 +#define SET_ACTIVE_FLAG_IN_TYPE 128 + +enum domain_displace_mode +{ + DISPLACE_POSITION_FORWARD, + DISPLACE_POSITION_BACKWARD +}; + +extern struct local_topnode_data +{ + peanokey Size; /*!< number of Peano-Hilbert mesh-cells represented by top-level node */ + peanokey StartKey; /*!< first Peano-Hilbert key in top-level node */ + long long Count; /*!< counts the number of particles in this top-level node */ + double Cost; + double SphCost; + int Daughter; /*!< index of first daughter cell (out of 8) of top-level node */ + int Leaf; /*!< if the node is a leaf, this gives its number when all leaves are traversed in Peano-Hilbert order */ + int Parent; + int PIndex; /*!< first particle in node */ + +} * topNodes, *branchNodes; /*!< points to the root node of the top-level tree */ + +struct domain_count_data +{ + int task; + int count; + int origintask; +}; + +extern struct domain_peano_hilbert_data +{ + peanokey key; + int index; +} * mp; + +extern struct trans_data +{ + MyIDType ID; + int new_task; + int new_index; + int wrapped; +} * trans_table; + +extern int N_trans; + +extern int Nbranch; + +extern double fac_work, fac_load, fac_worksph; +extern double normsum_work, normsum_load, normsum_worksph; + +extern double totgravcost, totpartcount, gravcost, totsphcost, sphcost; + +extern struct domain_cost_data +{ + int no; + float Work; /*!< total "work" due to the particles stored by a leave node */ + float WorkSph; /*!< total "work" due to the particles stored by a leave node */ + int Count; /*!< a table that gives the total number of particles held by each processor */ + int CountSph; /*!< a table that gives the total number of SPH particles held by each processor */ +} * DomainLeaveNode; + +/* toGo[partner] gives the number of particles on the current task that have to go to task 'partner' + */ +extern int *toGo, *toGoSph; +extern int *toGet, *toGetSph; +extern int *list_NumPart; +extern int *list_NumGas; +extern int *list_load; +extern int *list_loadsph; +extern double *list_work; +extern double *list_worksph; + +/* functions for domain decomposition */ +peano1D domain_double_to_int(double d); +double domain_grav_tot_costfactor(int i); +double domain_hydro_tot_costfactor(int i); +void domain_init_sum_cost(void); +void domain_printf(char *buf); +void domain_report_balance(void); +int domain_sort_load(const void *a, const void *b); +int domain_compare_count(const void *a, const void *b); +int domain_sort_task(const void *a, const void *b); +int domain_compare_count(const void *a, const void *b); +void domain_rearrange_particle_sequence(void); +void domain_combine_topleaves_to_domains(int ncpu, int ndomain); +void domain_combine_multipledomains(void); +void domain_allocate(void); +void domain_Decomposition(void); +int domain_compare_key(const void *a, const void *b); +int domain_countToGo(void); +int domain_determineTopTree(void); +void domain_exchange(void); +void domain_findExtent(void); +void domain_free(void); +void domain_sumCost(void); +void domain_walktoptree(int no); +void domain_optimize_domain_to_task_mapping(void); +int domain_compare_count(const void *a, const void *b); +void domain_allocate_lists(void); +void domain_free_lists(void); +int domain_unpack_tree_branch(int no, int parent); +void domain_do_local_refine(int n, int *list); +void domain_preserve_relevant_topnode_data(void); +void domain_find_total_cost(void); +void domain_voronoi_dynamic_update_execute(void); +void domain_prepare_voronoi_dynamic_update(void); +void domain_voronoi_dynamic_flag_particles(void); +void domain_mark_in_trans_table(int i, int task); +void domain_exchange_and_update_DC(void); +int domain_compare_connection_ID(const void *a, const void *b); +int domain_compare_local_trans_data_ID(const void *a, const void *b); +int domain_compare_recv_trans_data_ID(const void *a, const void *b); +int domain_compare_recv_trans_data_oldtask(const void *a, const void *b); +void mysort_domain(void *b, size_t n, size_t s); +void domain_displacePosition(MyDouble *pos, enum domain_displace_mode mode); + +#endif /* #ifndef DOMAIN_H */ diff --git a/src/amuse/community/arepo/src/domain/domain_DC_update.c b/src/amuse/community/arepo/src/domain/domain_DC_update.c new file mode 100644 index 0000000000..bf960ebfc1 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_DC_update.c @@ -0,0 +1,699 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_DC_update.c + * \date 05/2018 + * \brief Algorithms for voronoi dynamic update + * \details contains functions: + * void domain_mark_in_trans_table(int i, int task) + * void domain_exchange_and_update_DC(void) + * int domain_compare_connection_ID(const void *a, + * const void *b) + * int domain_compare_local_trans_data_ID(const void *a, + * const void *b) + * int domain_compare_recv_trans_data_ID(const void *a, + * const void *b) + * int domain_compare_recv_trans_data_oldtask(const void *a, + * const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +struct trans_data *trans_table; +int N_trans; + +/*! \brief Data structure for local auxiliary translation table. + */ +static struct local_aux_trans_data +{ + MyIDType ID; + int new_index; +} * local_trans_data; + +/*! \brief Data structure for communicating the translation table. + */ +static struct aux_trans_data +{ + MyIDType ID; + int old_task; + int old_index; + int new_index; +} * send_trans_data, *recv_trans_data; + +/*! \brief Data structure for transcribing data. + */ +static struct aux_transscribe_data +{ + int old_index; + int new_task; + int new_index; + int image_flags; +} * send_transscribe_data, *recv_transscribe_data; + +/*! \brief Fill translation table. + * + * Mark where cells are moved to and mark in DC accordingly to make sure + * they get communicated to the same task. + * + * \param[in] i Index in P and SphP arrays. + * \param[in] task Task to which particle i is exported. + * + * \return void + */ +void domain_mark_in_trans_table(int i, int task) +{ + if(Largest_Nvc > 0) + { + if(i < NumGas) + { + trans_table[i].ID = P[i].ID; + trans_table[i].new_task = task; + + int q = SphP[i].first_connection; + + while(q >= 0) + { + int qq = DC[q].next; + if(q == qq) + terminate("preventing getting stuck in a loop due to q == DC[q].next : i=%d q=%d last_connection=%d", i, q, + SphP[i].last_connection); + + if((P[i].Mass == 0 && P[i].ID == 0) || P[i].Type != 0) /* this cell has been deleted or turned into a star */ + DC[q].next = -1; + else + DC[q].next = task; /* we will temporarily use the next variable to store the new task */ + + if(q == SphP[i].last_connection) + break; + + q = qq; + } + } + else if(i < N_trans) + trans_table[i].new_task = -1; /* this one has been removed by rerrange_particle_sequence() */ + } +} + +/*! \brief Communicates connections. + * + * This algorithms communicates Delauny connections and updates them on the + * new task. + * + * \return void + */ +void domain_exchange_and_update_DC(void) +{ + double t0 = second(); + +#if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) + /* remove all image flags, after our box movement stunt they are all incorrect anyway */ + for(int i = 0; i < MaxNvc; i++) + { + DC[i].image_flags = 1; + } +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) */ + + /* first, we need to complete the translation table */ + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < N_trans; i++) + if(trans_table[i].new_task >= 0) + Send_count[trans_table[i].new_task]++; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + int nimport = 0, nexport = 0; + Recv_offset[0] = Send_offset[0] = 0; + + for(int j = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + send_trans_data = mymalloc("send_trans_data", nexport * sizeof(struct aux_trans_data)); + recv_trans_data = mymalloc("recv_trans_data", nimport * sizeof(struct aux_trans_data)); + + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < N_trans; i++) + { + int task = trans_table[i].new_task; + if(task >= 0) + { + send_trans_data[Send_offset[task] + Send_count[task]].ID = trans_table[i].ID; + send_trans_data[Send_offset[task] + Send_count[task]].old_index = i; + send_trans_data[Send_offset[task] + Send_count[task]].old_task = ThisTask; + Send_count[task]++; + } + } + + /* exchange the data */ + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&send_trans_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, + recvTask, TAG_DENS_B, &recv_trans_data[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + /* let's now sort the incoming list according to ID */ + mysort(recv_trans_data, nimport, sizeof(struct aux_trans_data), domain_compare_recv_trans_data_ID); + + /* make an auxiliary list for the local particles that we will also sort according to ID */ + local_trans_data = mymalloc("local_trans_data", NumGas * sizeof(struct local_aux_trans_data)); + for(int i = 0; i < NumGas; i++) + { + local_trans_data[i].ID = P[i].ID; + local_trans_data[i].new_index = i; + } + mysort(local_trans_data, NumGas, sizeof(struct local_aux_trans_data), domain_compare_local_trans_data_ID); + + int i, j; + /* now we go through and put in the new index for matching IDs */ + for(i = 0, j = 0; i < nimport && j < NumGas;) + { + if(recv_trans_data[i].ID < local_trans_data[j].ID) + { + recv_trans_data[i].new_index = -1; /* this particle has been eliminated */ + i++; + } + else if(recv_trans_data[i].ID > local_trans_data[j].ID) + j++; + else + { + recv_trans_data[i].new_index = local_trans_data[j].new_index; + i++; + j++; + } + } + + for(; i < nimport; i++) + recv_trans_data[i].new_index = -1; /* this particle has been eliminated */ + + myfree(local_trans_data); + + /* now order the received data by sending task, so that we can return it */ + mysort(recv_trans_data, nimport, sizeof(struct aux_trans_data), domain_compare_recv_trans_data_oldtask); + + /* return the data */ + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&recv_trans_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, + recvTask, TAG_DENS_B, &send_trans_data[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + /* now let's fill in the new_index entry into the translation table */ + for(int i = 0; i < nexport; i++) + trans_table[send_trans_data[i].old_index].new_index = send_trans_data[i].new_index; + + myfree(recv_trans_data); + myfree(send_trans_data); + + /* it's now time to transcribe the task and index fields in the DC list */ + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < MaxNvc; i++) + { + int task = DC[i].task; + if(task >= 0) + { + if(task >= NTask) + terminate("i=%d Nvc=%d MaxNvc=%d task=%d\n", i, Nvc, MaxNvc, task); + + Send_count[task]++; + } + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + nimport = nexport = 0; + Recv_offset[0] = Send_offset[0] = 0; + + for(int j = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + send_transscribe_data = mymalloc("send_transscribe_data", nexport * sizeof(struct aux_transscribe_data)); + recv_transscribe_data = mymalloc("recv_transscribe_data", nimport * sizeof(struct aux_transscribe_data)); + + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < MaxNvc; i++) + { + int task = DC[i].task; + if(task >= 0) + { + send_transscribe_data[Send_offset[task] + Send_count[task]].old_index = DC[i].index; + send_transscribe_data[Send_offset[task] + Send_count[task]].image_flags = DC[i].image_flags; + Send_count[task]++; + } + } + + /* exchange the data */ + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&send_transscribe_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct aux_transscribe_data), + MPI_BYTE, recvTask, TAG_DENS_B, &recv_transscribe_data[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct aux_transscribe_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + for(int i = 0; i < nimport; i++) + { + if(recv_transscribe_data[i].old_index >= N_trans) + terminate("recv_transscribe_data[i].old_index >= N_trans"); + + if(recv_transscribe_data[i].old_index < 0) + terminate("recv_transscribe_data[i].old_index < 0"); + + int old_index = recv_transscribe_data[i].old_index; + + recv_transscribe_data[i].new_task = trans_table[old_index].new_task; + recv_transscribe_data[i].new_index = trans_table[old_index].new_index; + +#if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) + // Nothing to do here +#else /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) */ + if(recv_transscribe_data[i].new_task >= 0) + { + if(trans_table[old_index].wrapped) + { + int bitflags = ffs(recv_transscribe_data[i].image_flags) - 1; + int zbits = (bitflags / 9); + int ybits = (bitflags - zbits * 9) / 3; + int xbits = bitflags - zbits * 9 - ybits * 3; + + if(trans_table[old_index].wrapped & 1) + { + if(xbits == 1) + xbits = 0; + else if(xbits == 0) + xbits = 2; + else /* xbits == 2 */ + terminate("b"); + } + else if(trans_table[old_index].wrapped & 2) + { + if(xbits == 1) + { + terminate("a"); + } + else if(xbits == 0) + xbits = 1; + else /* xbits == 2 */ + xbits = 0; + } + + if(trans_table[old_index].wrapped & 4) + { + if(ybits == 1) + ybits = 0; + else if(ybits == 0) + ybits = 2; + else + { + terminate("b"); + } + } + else if(trans_table[old_index].wrapped & 8) + { + if(ybits == 1) + { + terminate("a"); + } + else if(ybits == 0) + ybits = 1; + else + ybits = 0; + } + + if(trans_table[old_index].wrapped & 16) + { + if(zbits == 1) + zbits = 0; + else if(zbits == 0) + zbits = 2; + else + { + terminate("b"); + } + } + else if(trans_table[old_index].wrapped & 32) + { + if(zbits == 1) + { + terminate("a"); + } + else if(zbits == 0) + zbits = 1; + else + zbits = 0; + } + + recv_transscribe_data[i].image_flags = (1 << (zbits * 9 + ybits * 3 + xbits)); + } + } +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) #else */ + } + + /* now return the data */ + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&recv_transscribe_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct aux_transscribe_data), + MPI_BYTE, recvTask, TAG_DENS_B, &send_transscribe_data[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(struct aux_transscribe_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + /* copy the results over to the DC structure */ + for(int i = 0; i < MaxNvc; i++) + { + int task = DC[i].task; + if(task >= 0) + { + DC[i].task = send_transscribe_data[Send_offset[task] + Send_count[task]].new_task; + DC[i].index = send_transscribe_data[Send_offset[task] + Send_count[task]].new_index; + DC[i].image_flags = send_transscribe_data[Send_offset[task] + Send_count[task]].image_flags; + Send_count[task]++; + } + } + + myfree(recv_transscribe_data); + myfree(send_transscribe_data); + + /* now we can exchange the DC data. The task where each item should go is stored in 'next' at this point */ + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + /* count where they should go */ + for(int i = 0; i < MaxNvc; i++) + { + if(DC[i].task >= 0) + { + int task = DC[i].next; + if(task >= 0) + { + if(task >= NTask) + terminate("Thistask=%d i=%d Nvc=%d MaxNvc=%d DC[i].task=%d DC[i].next=%d\n", ThisTask, i, Nvc, MaxNvc, DC[i].task, + DC[i].next); + + if(DC[i].index >= 0) + Send_count[task]++; + } + } + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + nimport = nexport = 0; + Recv_offset[0] = Send_offset[0] = 0; + + for(int j = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* make sure that we have enough room to store the new DC list */ + while(nimport > MaxNvc) + { + int old_MaxNvc = MaxNvc; + Mesh.Indi.AllocFacNvc *= ALLOC_INCREASE_FACTOR; + MaxNvc = Mesh.Indi.AllocFacNvc; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvc=%d Indi.AllocFacNvc=%g\n", ThisTask, MaxNvc, Mesh.Indi.AllocFacNvc); +#endif /* #ifdef VERBOSE */ + DC = myrealloc_movable(DC, MaxNvc * sizeof(connection)); + for(int n = old_MaxNvc; n < MaxNvc; n++) + DC[n].task = -1; + } + + connection *tmpDC = mymalloc("tmpDC", nexport * sizeof(connection)); + + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < MaxNvc; i++) + { + if(DC[i].task >= 0) + { + int task = DC[i].next; + + if(task >= 0 && DC[i].index >= 0) + tmpDC[Send_offset[task] + Send_count[task]++] = DC[i]; + } + } + + /* exchange the connection information */ + + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&tmpDC[Send_offset[recvTask]], Send_count[recvTask] * sizeof(connection), MPI_BYTE, recvTask, TAG_DENS_B, + &DC[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(connection), MPI_BYTE, recvTask, TAG_DENS_B, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + myfree(tmpDC); + + Nvc = nimport; + + /* mark the remaining ones as available */ + for(int i = Nvc; i < MaxNvc - 1; i++) + { + DC[i].next = i + 1; + DC[i].task = -1; + } + DC[MaxNvc - 1].next = -1; + DC[MaxNvc - 1].task = -1; + + if(Nvc < MaxNvc) + FirstUnusedConnection = Nvc; + else + FirstUnusedConnection = -1; + + /* now we need to connect the information to the particles, this we do via the IDs */ + + local_trans_data = mymalloc("local_trans_data", NumGas * sizeof(struct local_aux_trans_data)); + for(int i = 0; i < NumGas; i++) + { + local_trans_data[i].ID = P[i].ID; + local_trans_data[i].new_index = i; /* is here used as rank of the particle */ + } + mysort(local_trans_data, NumGas, sizeof(struct local_aux_trans_data), domain_compare_local_trans_data_ID); + + mysort(DC, Nvc, sizeof(connection), domain_compare_connection_ID); + + int last = -1; + for(i = 0, j = 0; i < NumGas && j < Nvc; i++) + { + int k = local_trans_data[i].new_index; + + if(P[k].ID < DC[j].ID) + { + /* this particle has no connection information (new cell) */ + SphP[k].first_connection = -1; + SphP[k].last_connection = -1; + } + else if(P[k].ID == DC[j].ID) + { + SphP[k].first_connection = j; + + while(j < Nvc) + { + SphP[k].last_connection = j; + + if(last >= 0) + DC[last].next = j; + + last = j; + j++; + if(j >= Nvc) + break; + if(P[k].ID != DC[j].ID) + break; + } + } + else + { + terminate("strange"); + } + } + + for(; i < NumGas; i++) + { + int k = local_trans_data[i].new_index; + SphP[k].first_connection = -1; + SphP[k].last_connection = -1; + } + + if(last >= 0) + DC[last].next = -1; + + myfree(local_trans_data); + + double t1 = second(); + mpi_printf("DOMAIN: done with rearranging connection information (took %g sec)\n", timediff(t0, t1)); +} + +/*! \brief Compare which ID is larger. + * + * For connection data. + * + * \param[in] a Pointer to first object. + * \param[in] b Pointer to second object. + * + * \return (-1,0,1) -1 if a->ID is smaller. + */ +int domain_compare_connection_ID(const void *a, const void *b) +{ + if(((connection *)a)->ID < (((connection *)b)->ID)) + return -1; + + if(((connection *)a)->ID > (((connection *)b)->ID)) + return +1; + + return 0; +} + +/*! \brief Compare which ID is larger. + * + * For local_aux_trans_data. + * + * \param[in] a Pointer to first object. + * \param[in] b Pointer to second object. + * + * \return (-1,0,1) -1 if a->ID is smaller. + */ +int domain_compare_local_trans_data_ID(const void *a, const void *b) +{ + if(((struct local_aux_trans_data *)a)->ID < (((struct local_aux_trans_data *)b)->ID)) + return -1; + + if(((struct local_aux_trans_data *)a)->ID > (((struct local_aux_trans_data *)b)->ID)) + return +1; + + return 0; +} + +/*! \brief Compare which ID is larger. + * + * For aux_trans_data. + * + * \param[in] a Pointer to first object. + * \param[in] b Pointer to second object. + * + * \return (-1,0,1) -1 if a->ID is smaller. + */ +int domain_compare_recv_trans_data_ID(const void *a, const void *b) +{ + if(((struct aux_trans_data *)a)->ID < (((struct aux_trans_data *)b)->ID)) + return -1; + + if(((struct aux_trans_data *)a)->ID > (((struct aux_trans_data *)b)->ID)) + return +1; + + return 0; +} + +/*! \brief Compare which old_task is larger. + * + * For aux_trans_data. + * + * \param[in] a Pointer to first object. + * \param[in] b Pointer to second object. + * + * \return (-1,0,1) -1 if a->old_task is smaller. + */ +int domain_compare_recv_trans_data_oldtask(const void *a, const void *b) +{ + if(((struct aux_trans_data *)a)->old_task < (((struct aux_trans_data *)b)->old_task)) + return -1; + + if(((struct aux_trans_data *)a)->old_task > (((struct aux_trans_data *)b)->old_task)) + return +1; + + return 0; +} diff --git a/src/amuse/community/arepo/src/domain/domain_balance.c b/src/amuse/community/arepo/src/domain/domain_balance.c new file mode 100644 index 0000000000..fcb384ae38 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_balance.c @@ -0,0 +1,1154 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain/domain_balance.c + * \date 05/2018 + * \brief Load-balancing algorithms. + * \details Algorithms to estimate cost of different particles and cells + * and to balance the workload and memory usage equally over the + * mpi tasks. + * contains functions: + * double domain_grav_tot_costfactor(int i) + * double domain_hydro_tot_costfactor(int i) + * void domain_init_sum_cost(void) + * void domain_sumCost(void) + * void domain_combine_topleaves_to_domains(int ncpu, int + * ndomain) + * int domain_sort_task(const void *a, const void *b) + * int domain_sort_load(const void *a, const void *b) + * static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) + * void domain_combine_multipledomains(void) + * void domain_optimize_domain_to_task_mapping(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/bsd_tree.h" +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +/* do some preparation work for use of red-black ordered binary tree based on BSD macros */ + +/*! \brief Defines structure of mytree nodes. + */ +struct mydata +{ + double pri; + int target; + RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */ +}; + +/* prototype of comparison function of tree elements */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs); + +/* the following macro declares 'struct mytree', which is the header element needed as handle for a tree */ +RB_HEAD(mytree, mydata); + +/* the following macros declare appropriate function prototypes and functions needed for this type of tree */ +RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp); +RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp); + +/*! \brief Computes gravity cost. + * + * All timebins in which the particle appears are summed, and the relative + * frequency with which this timebin is executed is taken into account. + * + * \param[in] i Index of cell in P and SphP array. + * + * \return cost-factor. + */ +double domain_grav_tot_costfactor(int i) +{ + double w = MIN_FLOAT_NUMBER; + +#ifdef SELFGRAVITY + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestActiveTimeBin; bin++) + { + if(domain_to_be_balanced[bin]) + { +#ifdef HIERARCHICAL_GRAVITY + if(bin >= P[i].TimeBinGrav) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + if(domain_bintolevel[bin] >= 0) + w += domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[bin]]; + else + { + if(domain_refbin[bin] >= 0) + w += domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[domain_refbin[bin]]]; + else + w += domain_grav_weight[bin]; + } + } + } + } +#endif /* #ifdef SELFGRAVITY */ + + return w; +} + +/*! \brief Computes hydro cost. + * + * If a cell is active on a certain timebin, it is assigned a cost of "1". + * All active timebins are summed, and the frequency with which each timebin + * is executed is taken into account. + * + * \param[in] i Index of cell in P and SphP array. + * + * \return cost-factor. + */ +double domain_hydro_tot_costfactor(int i) +{ + double w = 0; + + if(P[i].Type == 0) + for(int bin = P[i].TimeBinHydro; bin <= All.HighestOccupiedTimeBin; bin++) + if(domain_to_be_balanced[bin]) + w += domain_hydro_weight[bin]; + + return w; +} + +/*! \brief Prepares cost measurement. + * + * This function prepares the measurement of the total cost on each domain. + * In particular, we determine how the timebins are mapped to the explicit + * measurements of the gravity cost stored in the P.GravCost[] array (which + * in general will only be available for a subset of all timebins). For the + * unmatched timebins, a closest bin is selected that is the most similar in + * terms of particle number on the bin. Finally, the routine also determines + * how often each timebin is executed in one cycle associated with the + * highest occupied timebin. + * + * \return void + */ +void domain_init_sum_cost(void) +{ + long long tot_count[TIMEBINS], tot_count_sph[TIMEBINS]; + + sumup_large_ints(TIMEBINS, TimeBinsGravity.TimeBinCount, tot_count); + sumup_large_ints(TIMEBINS, TimeBinsHydro.TimeBinCount, tot_count_sph); + + for(int i = 0; i < TIMEBINS; i++) + { + domain_bintolevel[i] = -1; + domain_refbin[i] = -1; + } + + for(int j = 0; j < GRAVCOSTLEVELS; j++) /* bins that have known levels at this point */ + if(All.LevelToTimeBin[j] >= 0) + domain_bintolevel[All.LevelToTimeBin[j]] = j; + + for(int i = 0; i < TIMEBINS; i++) + if(tot_count[i] > 0 && domain_bintolevel[i] < 0) /* need to find a reference bin for this one */ + { + double mindiff = MAX_REAL_NUMBER; + int ref_bin = -1; + for(int j = 0; j < TIMEBINS; j++) + if(domain_bintolevel[j] >= 0 && tot_count[j] > 0) + { + if(mindiff > llabs(tot_count[i] - tot_count[j])) + { + mindiff = llabs(tot_count[i] - tot_count[j]); + ref_bin = j; + } + } + + if(ref_bin >= 0) + domain_refbin[i] = ref_bin; + } + + for(int i = 0; i < TIMEBINS; i++) + { + domain_to_be_balanced[i] = 0; + domain_grav_weight[i] = 1; + domain_hydro_weight[i] = 1; + } + +#ifdef HIERARCHICAL_GRAVITY + + domain_to_be_balanced[All.HighestActiveTimeBin] = 1; + domain_grav_weight[All.HighestActiveTimeBin] = 1; + domain_hydro_weight[All.HighestActiveTimeBin] = 1; + + for(int j = All.HighestActiveTimeBin - 1; j >= All.LowestOccupiedTimeBin; j--) + { + if(tot_count[j] > 0 || tot_count_sph[j] > 0) + domain_to_be_balanced[j] = 1; + + domain_grav_weight[j] += 2; + } + + for(int i = All.SmallestTimeBinWithDomainDecomposition - 1, weight = 1; i >= All.LowestOccupiedTimeBin; i--, weight *= 2) + { + if(tot_count[i] > 0) + { + domain_grav_weight[i] = weight; + + for(int j = i - 1; j >= All.LowestOccupiedTimeBin; j--) + domain_grav_weight[j] += 2 * weight; + } + + if(tot_count_sph[i] > 0) + domain_hydro_weight[i] = weight; + } + +#else /* #ifdef HIERARCHICAL_GRAVITY */ + + domain_to_be_balanced[All.HighestActiveTimeBin] = 1; + domain_grav_weight[All.HighestActiveTimeBin] = 1; + domain_hydro_weight[All.HighestActiveTimeBin] = 1; + + for(int i = All.SmallestTimeBinWithDomainDecomposition - 1, weight = 1; i >= All.LowestOccupiedTimeBin; i--, weight *= 2) + { + if(tot_count[i] > 0 || tot_count_sph[i] > 0) + domain_to_be_balanced[i] = 1; + + if(tot_count[i] > 0) + domain_grav_weight[i] = weight; + + if(tot_count_sph[i] > 0) + domain_hydro_weight[i] = weight; + } + +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ +} + +/*! \brief Determine cost and load + * + * This function determines the cost and load associated with each top-level + * leaf node of the tree. These leave nodes can be distributed among the + * processors in order to reach a good work-load and memory-load balance. + * + * \return void + */ +void domain_sumCost(void) +{ + int i, j, n, no, nexport = 0, nimport = 0, ngrp, task, loc_first_no; + + struct domain_cost_data *loc_DomainLeaveNode, *listCost, *export_node_data, *import_node_data; + + int *blocksize = mymalloc("blocksize", sizeof(int) * NTask); + int blk = NTopleaves / NTask; + int rmd = NTopleaves - blk * NTask; /* remainder */ + int pivot_no = rmd * (blk + 1); + + for(task = 0, loc_first_no = 0; task < NTask; task++) + { + if(task < rmd) + blocksize[task] = blk + 1; + else + blocksize[task] = blk; + + if(task < ThisTask) + loc_first_no += blocksize[task]; + } + + loc_DomainLeaveNode = mymalloc("loc_DomainLeaveNode", blocksize[ThisTask] * sizeof(struct domain_cost_data)); + memset(loc_DomainLeaveNode, 0, blocksize[ThisTask] * sizeof(struct domain_cost_data)); + + listCost = mymalloc("listCost", NTopleaves * sizeof(struct domain_cost_data)); + + int *no_place = mymalloc("no_place", NTopleaves * sizeof(int)); + memset(no_place, -1, NTopleaves * sizeof(int)); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + /* find for each particle its top-leave, and then add the associated cost with it */ + for(n = 0; n < NumPart; n++) + { +#ifdef ADDBACKGROUNDGRID + if(P[n].Type != 0) + continue; +#endif /* #ifdef ADDBACKGROUNDGRID */ + no = 0; + + peanokey mask = ((peanokey)7) << (3 * (BITS_PER_DIMENSION - 1)); + int shift = 3 * (BITS_PER_DIMENSION - 1); + + while(topNodes[no].Daughter >= 0) + { + no = topNodes[no].Daughter + (int)((Key[n] & mask) >> shift); + mask >>= 3; + shift -= 3; + } + + no = topNodes[no].Leaf; + + int p = no_place[no]; + if(p < 0) + { + p = nexport++; + no_place[no] = p; + + memset(&listCost[p], 0, sizeof(struct domain_cost_data)); + listCost[p].no = no; + + if(no < pivot_no) + task = no / (blk + 1); + else + task = rmd + (no - pivot_no) / blk; /* note: if blk=0, then this case can not occur, since then always no < pivot_no */ + + if(task < 0 || task > NTask) + terminate("task < 0 || task > NTask"); + + Send_count[task]++; + } + + listCost[p].Count += 1; + listCost[p].Work += domain_grav_tot_costfactor(n); + listCost[p].WorkSph += domain_hydro_tot_costfactor(n); + + if(P[n].Type == 0) + listCost[p].CountSph += 1; + } + + myfree(no_place); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + export_node_data = mymalloc("export_node_data", nexport * sizeof(struct domain_cost_data)); + import_node_data = mymalloc("import_node_data", nimport * sizeof(struct domain_cost_data)); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < nexport; i++) + { + if(listCost[i].no < pivot_no) + task = listCost[i].no / (blk + 1); + else + task = rmd + + (listCost[i].no - pivot_no) / blk; /* note: if blk=0, then this case can not occur, since then always no < pivot_no */ + + int ind = Send_offset[task] + Send_count[task]++; + export_node_data[ind] = listCost[i]; + } + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */ + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&export_node_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct domain_cost_data), MPI_BYTE, + recvTask, TAG_DENS_B, &import_node_data[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct domain_cost_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + for(i = 0; i < nimport; i++) + { + int j = import_node_data[i].no - loc_first_no; + + if(j < 0 || j >= blocksize[ThisTask]) + terminate("j=%d < 0 || j>= blocksize[ThisTask]=%d loc_first_no=%d import_node_data[i].no=%d i=%d nimport=%d", j, + blocksize[ThisTask], loc_first_no, import_node_data[i].no, i, nimport); + + loc_DomainLeaveNode[j].Count += import_node_data[i].Count; + loc_DomainLeaveNode[j].Work += import_node_data[i].Work; + loc_DomainLeaveNode[j].CountSph += import_node_data[i].CountSph; + loc_DomainLeaveNode[j].WorkSph += import_node_data[i].WorkSph; + } + + myfree(import_node_data); + myfree(export_node_data); + + /* now share the cost data across all processors */ + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(task = 0; task < NTask; task++) + bytecounts[task] = blocksize[task] * sizeof(struct domain_cost_data); + + for(task = 1, byteoffset[0] = 0; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + MPI_Allgatherv(loc_DomainLeaveNode, bytecounts[ThisTask], MPI_BYTE, DomainLeaveNode, bytecounts, byteoffset, MPI_BYTE, + MPI_COMM_WORLD); + + myfree(byteoffset); + myfree(bytecounts); + myfree(listCost); + myfree(loc_DomainLeaveNode); + myfree(blocksize); +} + +/*! \brief Uses cost function to combine top-level nodes to domains. + * + * This function uses the cumulative cost function (which weights work-load + * and memory-load equally) to subdivide the list of top-level leave nodes + * into pieces that are (approximately) equal in size. + * + * \param[in] ncpu Number of chunks/damains. + * \param[in] ndomain Number of topleaves. + * + * \return void + */ +void domain_combine_topleaves_to_domains(int ncpu, int ndomain) +{ + double t0 = second(); + + double max_work = 0; + double workhalfnode = 0.5 / ndomain; + double workavg = 1.0 / ncpu; + double work_before = 0, workavg_before = 0; + int start = 0; + + int nabove_grav = 0, nabove_sph = 0; + double todistribute_grav = 0.0; + double todistribute_sph = 0.0; + double weightsum_grav = 0.0; + double weightsum_sph = 0.0; + + for(int i = 0; i < ndomain; i++) + { + if(fac_work * DomainLeaveNode[i].Work > normsum_work / ncpu) + { + nabove_grav++; + todistribute_grav += DomainLeaveNode[i].Work - normsum_work / ncpu / fac_work; + } + else + weightsum_grav += DomainLeaveNode[i].Count; + + if(fac_worksph * DomainLeaveNode[i].WorkSph > normsum_worksph / ncpu) + { + nabove_sph++; + todistribute_sph += DomainLeaveNode[i].WorkSph - normsum_worksph / ncpu / fac_worksph; + } + else + weightsum_sph += DomainLeaveNode[i].Count; + } + + struct leafnode_data + { + double workgrav; + double worksph; + }; + + struct leafnode_data *leaf = (struct leafnode_data *)mymalloc("leaf", ndomain * sizeof(struct leafnode_data)); + + for(int i = 0; i < ndomain; i++) + { + leaf[i].workgrav = DomainLeaveNode[i].Work; + leaf[i].worksph = DomainLeaveNode[i].WorkSph; + + if(fac_work > 0 && weightsum_grav > 0) + { + if(fac_work * DomainLeaveNode[i].Work > normsum_work / ncpu) + leaf[i].workgrav = normsum_work / ncpu / fac_work; + else + leaf[i].workgrav += (DomainLeaveNode[i].Count / weightsum_grav) * todistribute_grav; + } + + if(fac_worksph > 0 && weightsum_sph > 0) + { + if(fac_worksph * DomainLeaveNode[i].WorkSph > normsum_worksph / ncpu) + leaf[i].worksph = normsum_worksph / ncpu / fac_worksph; + else + leaf[i].worksph += (DomainLeaveNode[i].Count / weightsum_sph) * todistribute_sph; + } + } + + for(int i = 0; i < ncpu; i++) + { + double work = 0; + int end = start; + + work += fac_work * leaf[end].workgrav + fac_load * DomainLeaveNode[end].Count + fac_worksph * leaf[end].worksph; + + while((work + work_before + + (end + 1 < ndomain ? fac_work * leaf[end + 1].workgrav + fac_load * DomainLeaveNode[end + 1].Count + + fac_worksph * leaf[end + 1].worksph + : 0) < + workavg + workavg_before + workhalfnode) || + (i == ncpu - 1 && end < ndomain - 1)) + { + if((ndomain - end) > (ncpu - i)) + end++; + else + break; + + work += fac_work * leaf[end].workgrav + fac_load * DomainLeaveNode[end].Count + fac_worksph * leaf[end].worksph; + } + + DomainStartList[i] = start; + DomainEndList[i] = end; + + work_before += work; + workavg_before += workavg; + start = end + 1; + + if(max_work < work) + max_work = work; + } + + myfree(leaf); + + double t1 = second(); + mpi_printf("DOMAIN: balance reached among multiple-domains=%g, average leave-nodes per domain=%g (took %g sec)\n", + max_work / workavg, ((double)ndomain) / ncpu, timediff(t0, t1)); +} + +/*! \brief Structure containing data for segments. + */ +static struct domain_segments_data +{ + int task, start, end; + double bin_GravCost[TIMEBINS]; + double bin_HydroCost[TIMEBINS]; + double work; + double load; + double worksph; + double normalized_load; +} * domainAssign; + +/*! \brief Structure containing data for task list. + */ +struct tasklist_data +{ + double bin_GravCost[TIMEBINS]; + double bin_HydroCost[TIMEBINS]; + double work; + double load; + double worksph; + int count; +} * tasklist; + +/*! \brief Comparison function for domain_segments_data structure. + * + * Compares field task. + * + * \param a Pointer to fist object. + * \param b Pointer to second object. + * + * \return (-1,0,1); -1 if a < b. + */ +int domain_sort_task(const void *a, const void *b) +{ + if(((struct domain_segments_data *)a)->task < (((struct domain_segments_data *)b)->task)) + return -1; + + if(((struct domain_segments_data *)a)->task > (((struct domain_segments_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Comparison functions for domain_segmens_data structures. + * + * Compares field normalized_load. + * + * \param a Pointer to fist object. + * \param b Pointer to second object. + * + * \return (-1,0,1) -1 if a>b. + */ +int domain_sort_load(const void *a, const void *b) +{ + if(((struct domain_segments_data *)a)->normalized_load > (((struct domain_segments_data *)b)->normalized_load)) + return -1; + + if(((struct domain_segments_data *)a)->normalized_load < (((struct domain_segments_data *)b)->normalized_load)) + return +1; + + return 0; +} + +/*! \brief Comparison function for objects of type mydata. + * + * Compares elements pri and target. + * + * \param lhs Pointer to fist object. + * \param rhs Pointer to second object. + * + * \return (-1,0,1); -1 if lhs < rhs. + */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) +{ + if(lhs->pri < rhs->pri) + return -1; + else if(lhs->pri > rhs->pri) + return 1; + else if(lhs->target < rhs->target) + return -1; + else if(lhs->target > rhs->target) + return 1; + + return 0; +} + +/*! \brief Assigns the domain pieces to individual MPI tasks with the goal to + * balance the work-load on different timebins. + * + * The algorithm used works as follows: + * The domains are assigned to the CPUs in sequence of decreasing "effective + * load", which is a simple combined measure of relative total gravity, hydro + * and memory load. For each assignment, a number of possible target CPUs are + * evaluated, and the assignment leading to the lowest total runtime is + * adopted. The set of target CPUs that is tested in each step is the one + * that consists of the CPUs that currently have the lowest load in the set + * of primary tasks that are examined. + * + * \return void + */ +void domain_combine_multipledomains(void) +{ + double t0 = second(); + + int ndomains = All.MultipleDomains * NTask; + + domainAssign = (struct domain_segments_data *)mymalloc("domainAssign", ndomains * sizeof(struct domain_segments_data)); + + tasklist = mymalloc("tasklist", NTask * sizeof(struct tasklist_data)); + + for(int ta = 0; ta < NTask; ta++) + { + tasklist[ta].load = 0; + tasklist[ta].work = 0; + tasklist[ta].worksph = 0; + tasklist[ta].count = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + tasklist[ta].bin_GravCost[i] = 0; + tasklist[ta].bin_HydroCost[i] = 0; + } + } + + for(int n = 0; n < ndomains; n++) + for(int i = DomainStartList[n]; i <= DomainEndList[n]; i++) + DomainTask[i] = n; + + /* we first determine the grav-cost and hydro-cost separately for each + * timebin of all the domain-pieces that are available for a + * mapping to individual MPI tasks + */ + + struct cost_data + { + double GravCost; + double HydroCost; + } * loc_bin_Cost, *glob_bin_Cost; + + loc_bin_Cost = mymalloc_clear("loc_bin_Cost", sizeof(struct cost_data) * ndomains * TIMEBINS); + glob_bin_Cost = mymalloc_clear("glob_bin_Cost", sizeof(struct cost_data) * ndomains * TIMEBINS); + + for(int i = 0; i < NumPart; i++) + { +#ifdef ADDBACKGROUNDGRID + if(P[i].Type != 0) + continue; +#endif /* #ifdef ADDBACKGROUNDGRID */ + int no = 0; + + peanokey mask = ((peanokey)7) << (3 * (BITS_PER_DIMENSION - 1)); + int shift = 3 * (BITS_PER_DIMENSION - 1); + + while(topNodes[no].Daughter >= 0) + { + no = topNodes[no].Daughter + (int)((Key[i] & mask) >> shift); + mask >>= 3; + shift -= 3; + } + + no = topNodes[no].Leaf; + + int n = DomainTask[no]; + +#ifdef SELFGRAVITY + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestActiveTimeBin; bin++) + { + if(domain_to_be_balanced[bin]) + { +#ifdef HIERARCHICAL_GRAVITY + if(bin >= P[i].TimeBinGrav) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + if(domain_bintolevel[bin] >= 0) + loc_bin_Cost[bin * ndomains + n].GravCost += + MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[bin]]; + else + { + if(domain_refbin[bin] >= 0) + loc_bin_Cost[bin * ndomains + n].GravCost += + MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[domain_refbin[bin]]]; + else + loc_bin_Cost[bin * ndomains + n].GravCost += domain_grav_weight[bin]; + } + } + } + } +#endif /* #ifdef SELFGRAVITY */ + + if(P[i].Type == 0) + { + for(int bin = P[i].TimeBinHydro; bin <= All.HighestActiveTimeBin; bin++) + if(domain_to_be_balanced[bin]) + loc_bin_Cost[bin * ndomains + n].HydroCost += domain_hydro_weight[bin]; + } + } + + allreduce_sparse_double_sum((double *)(loc_bin_Cost + All.LowestOccupiedTimeBin * ndomains), + (double *)(glob_bin_Cost + All.LowestOccupiedTimeBin * ndomains), + 2 * ndomains * (All.HighestOccupiedTimeBin - All.LowestOccupiedTimeBin + 1)); + + /* now assign this cost to the domainAssign-structure, which keeps track of the different pieces */ + double tot_work = 0; + double tot_load = 0; + double tot_worksph = 0; + + for(int n = 0; n < ndomains; n++) + { + domainAssign[n].start = DomainStartList[n]; + domainAssign[n].end = DomainEndList[n]; + domainAssign[n].work = 0; + domainAssign[n].load = 0; + domainAssign[n].worksph = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + domainAssign[n].bin_GravCost[i] = glob_bin_Cost[i * ndomains + n].GravCost; + domainAssign[n].bin_HydroCost[i] = glob_bin_Cost[i * ndomains + n].HydroCost; + } + + for(int i = DomainStartList[n]; i <= DomainEndList[n]; i++) + { + domainAssign[n].work += DomainLeaveNode[i].Work; + domainAssign[n].load += DomainLeaveNode[i].Count; + domainAssign[n].worksph += DomainLeaveNode[i].WorkSph; + } + + tot_work += domainAssign[n].work; + tot_load += domainAssign[n].load; + tot_worksph += domainAssign[n].worksph; + } + + for(int n = 0; n < ndomains; n++) + { + domainAssign[n].normalized_load = domainAssign[n].work / (tot_work + MIN_FLOAT_NUMBER) + + domainAssign[n].worksph / (tot_worksph + MIN_FLOAT_NUMBER) + + domainAssign[n].load / ((double)tot_load + MIN_FLOAT_NUMBER); + } + + myfree(glob_bin_Cost); + myfree(loc_bin_Cost); + + /* sort the pieces according to their normalized work-load, with the most heavily loaded coming first */ + mysort(domainAssign, ndomains, sizeof(struct domain_segments_data), domain_sort_load); + + /* initialize a structure that stores the maximum gravity and hydro cost load for each timebin */ + double max_GravCost[TIMEBINS], max_HydroCost[TIMEBINS]; + for(int i = 0; i < TIMEBINS; i++) + { + max_GravCost[i] = 0; + max_HydroCost[i] = 0; + } + + double max_load = 0; + + /* create priority trees, one for the cost of each occupied timebin, + * one for the hydro cost of each occupied timebin */ + struct mytree queue_gravcost[TIMEBINS]; + struct mytree queue_hydrocost[TIMEBINS]; + struct mytree queue_load; + struct mydata *ngrav[TIMEBINS]; + struct mydata *nhydro[TIMEBINS]; + struct mydata *nload; + + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { + if(domain_to_be_balanced[bin]) + { + RB_INIT(&queue_gravcost[bin]); + ngrav[bin] = mymalloc("ngrav[bin]", NTask * sizeof(struct mydata)); + + RB_INIT(&queue_hydrocost[bin]); + nhydro[bin] = mymalloc("nhydro[bin]", NTask * sizeof(struct mydata)); + } + } + + RB_INIT(&queue_load); + nload = mymalloc("nload", NTask * sizeof(struct mydata)); + for(int i = 0; i < NTask; i++) + { + nload[i].pri = 0; + nload[i].target = i; + RB_INSERT(mytree, &queue_load, &nload[i]); + } + + /* fill in all the tasks into each queue. The priority will be the current cost of the bin, the tag 'val' is used to label the task + */ + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { + if(!domain_to_be_balanced[bin]) + continue; + + for(int i = 0; i < NTask; i++) + { + ngrav[bin][i].pri = 0; + ngrav[bin][i].target = i; + RB_INSERT(mytree, &queue_gravcost[bin], &ngrav[bin][i]); + + nhydro[bin][i].pri = 0; + nhydro[bin][i].target = i; + RB_INSERT(mytree, &queue_hydrocost[bin], &nhydro[bin][i]); + } + } + + int n_lowest = MAX_FIRST_ELEMENTS_CONSIDERED; + if(n_lowest > NTask) + n_lowest = NTask; + + int rep, *candidates = mymalloc("candidates", n_lowest * sizeof(int)); + struct mydata *np; + + /* now assign each of the domains to a CPU, trying to minimize the overall runtime */ + for(int n = 0; n < ndomains; n++) + { + double best_runtime = MAX_FLOAT_NUMBER; + int best_target = -1; + + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { + if(!domain_to_be_balanced[bin]) + continue; + + int target; + + for(int set = 0; set < 2; set++) + { + if(set == 0) + { +#ifndef SELFGRAVITY + continue; +#endif /* #ifndef SELFGRAVITY */ + /* look up the n_lowest smallest elements from the tree */ + for(np = RB_MIN(mytree, &queue_gravcost[bin]), rep = 0; np != NULL && rep < n_lowest; + np = RB_NEXT(mytree, &queue_gravcost[bin], np), rep++) + candidates[rep] = np->target; + } + else + { + for(np = RB_MIN(mytree, &queue_hydrocost[bin]), rep = 0; np != NULL && rep < n_lowest; + np = RB_NEXT(mytree, &queue_hydrocost[bin], np), rep++) + candidates[rep] = np->target; + } + + for(rep = 0; rep < n_lowest; rep++) + { + target = candidates[rep]; + + double runtime = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + double sum = domainAssign[n].bin_GravCost[i] + tasklist[target].bin_GravCost[i]; + if(sum < max_GravCost[i]) + sum = max_GravCost[i]; + + runtime += sum / (totgravcost + MIN_FLOAT_NUMBER); + } + + for(int i = 0; i < TIMEBINS; i++) + { + double sum = domainAssign[n].bin_HydroCost[i] + tasklist[target].bin_HydroCost[i]; + if(sum < max_HydroCost[i]) + sum = max_HydroCost[i]; + + runtime += sum / (totsphcost + MIN_FLOAT_NUMBER); + } + + double load = domainAssign[n].load + tasklist[target].load; + if(load < max_load) + load = max_load; + + runtime += ((double)load) / totpartcount; + + if(runtime < best_runtime || best_target < 0) + { + best_runtime = runtime; + best_target = target; + } + } + } + } + + /* now check also the load queue */ + for(np = RB_MIN(mytree, &queue_load), rep = 0; np != NULL && rep < n_lowest; np = RB_NEXT(mytree, &queue_load, np), rep++) + candidates[rep] = np->target; + + int target; + + for(rep = 0; rep < n_lowest; rep++) + { + target = candidates[rep]; + + double runtime = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + double sum = domainAssign[n].bin_GravCost[i] + tasklist[target].bin_GravCost[i]; + if(sum < max_GravCost[i]) + sum = max_GravCost[i]; + + runtime += sum / (totgravcost + 1.0e-60); + } + + for(int i = 0; i < TIMEBINS; i++) + { + double sum = domainAssign[n].bin_HydroCost[i] + tasklist[target].bin_HydroCost[i]; + if(sum < max_HydroCost[i]) + sum = max_HydroCost[i]; + + runtime += sum / (totsphcost + 1.0e-60); + } + + double load = domainAssign[n].load + tasklist[target].load; + if(load < max_load) + load = max_load; + + runtime += ((double)load) / totpartcount; + + if(runtime < best_runtime || best_target < 0) + { + best_runtime = runtime; + best_target = target; + } + } + + if(best_target < 0) + terminate("best_target < 0"); + + target = best_target; + + domainAssign[n].task = target; + tasklist[target].work += domainAssign[n].work; + tasklist[target].load += domainAssign[n].load; + tasklist[target].worksph += domainAssign[n].worksph; + tasklist[target].count++; + + /* now update the elements in the sorted trees */ + + RB_REMOVE(mytree, &queue_load, &nload[target]); + nload[target].pri = tasklist[target].load; + RB_INSERT(mytree, &queue_load, &nload[target]); + + if(max_load < tasklist[target].load) + max_load = tasklist[target].load; + + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { + if(domain_to_be_balanced[bin]) + { + tasklist[target].bin_GravCost[bin] += domainAssign[n].bin_GravCost[bin]; + tasklist[target].bin_HydroCost[bin] += domainAssign[n].bin_HydroCost[bin]; + + double eps_grav = 1.0e-9 * (domainAssign[n].load / totpartcount) * + totgravcost; /* these will be added in order to break degeneracies in the sort-order in case the + grav/hydro cost in certain cells is zero */ + double eps_hydro = 1.0e-9 * (domainAssign[n].load / totpartcount) * totsphcost; + + RB_REMOVE(mytree, &queue_gravcost[bin], &ngrav[bin][target]); + ngrav[bin][target].pri = ngrav[bin][target].pri + domainAssign[n].bin_GravCost[bin] + eps_grav; + RB_INSERT(mytree, &queue_gravcost[bin], &ngrav[bin][target]); + + RB_REMOVE(mytree, &queue_hydrocost[bin], &nhydro[bin][target]); + nhydro[bin][target].pri = nhydro[bin][target].pri + domainAssign[n].bin_HydroCost[bin] + eps_hydro; + RB_INSERT(mytree, &queue_hydrocost[bin], &nhydro[bin][target]); + + if(max_GravCost[bin] < tasklist[target].bin_GravCost[bin]) + max_GravCost[bin] = tasklist[target].bin_GravCost[bin]; + + if(max_HydroCost[bin] < tasklist[target].bin_HydroCost[bin]) + max_HydroCost[bin] = tasklist[target].bin_HydroCost[bin]; + } + } + } + + myfree(candidates); + + /* free the elements for the RB tree again */ + myfree(nload); + for(int bin = All.HighestOccupiedTimeBin; bin >= All.LowestOccupiedTimeBin; bin--) + { + if(domain_to_be_balanced[bin]) + { + myfree(nhydro[bin]); + myfree(ngrav[bin]); + } + } + + mysort(domainAssign, ndomains, sizeof(struct domain_segments_data), domain_sort_task); + + for(int n = 0; n < ndomains; n++) + { + DomainStartList[n] = domainAssign[n].start; + DomainEndList[n] = domainAssign[n].end; + + for(int i = DomainStartList[n]; i <= DomainEndList[n]; i++) + DomainTask[i] = domainAssign[n].task; + } + + myfree(tasklist); + myfree(domainAssign); + + double t1 = second(); + mpi_printf("DOMAIN: combining multiple-domains took %g sec\n", timediff(t0, t1)); +} + +/*! \brief Assign domains to tasks to minimize communication. + * + * This function determines a permutation of the new assignment of domains to + * CPUs such that the number of particles that has to be moved given the + * current distribution of particles is minimized. + * + * \return void + */ +void domain_optimize_domain_to_task_mapping(void) +{ + double t0 = second(); + + int *count_per_task = mymalloc_clear("count_per_task", NTask * sizeof(int)); + + /* count how many we want to send to each task */ + for(int i = 0; i < NumPart; i++) + { + int no = 0; + + while(topNodes[no].Daughter >= 0) + no = topNodes[no].Daughter + (Key[i] - topNodes[no].StartKey) / (topNodes[no].Size >> 3); + + no = topNodes[no].Leaf; + + int task = DomainTask[no]; + count_per_task[task]++; + } + + /* find the task that holds most of our particles (we really would like to be this task) */ + + int maxcount = count_per_task[0], maxtask = 0; + for(int i = 1; i < NTask; i++) + if(count_per_task[i] > maxcount) + { + maxcount = count_per_task[i]; + maxtask = i; + } + + struct domain_count_data loc_count; + struct domain_count_data *domain_count = mymalloc("domain_count", NTask * sizeof(struct domain_count_data)); + + loc_count.task = maxtask; + loc_count.count = maxcount; + loc_count.origintask = ThisTask; + + MPI_Allgather(&loc_count, sizeof(struct domain_count_data), MPI_BYTE, domain_count, sizeof(struct domain_count_data), MPI_BYTE, + MPI_COMM_WORLD); + + qsort(domain_count, NTask, sizeof(struct domain_count_data), domain_compare_count); + + /* this array will hold a permutation of all tasks constructed such that + particle exchange should be minimized */ + + int *new_task = mymalloc("new_task", NTask * sizeof(int)); + + /* this array will now flag tasks that have been assigned */ + for(int i = 0; i < NTask; i++) + { + count_per_task[i] = 0; + new_task[i] = -1; + } + + for(int i = 0; i < NTask; i++) + { + int task = domain_count[i].task; + int origin = domain_count[i].origintask; + + if(new_task[task] == -1 && count_per_task[origin] == 0) + { + count_per_task[origin] = 1; /* taken */ + new_task[task] = origin; + } + } + + /* now we have to fill up still unassigned ones in case there were collisions */ + for(int i = 0, j = 0; i < NTask; i++) + { + if(new_task[i] == -1) + { + while(count_per_task[j]) + j++; + + new_task[i] = j; + count_per_task[j] = 1; + } + } + + int *copy_DomainStartList = mymalloc("copy_DomainStartList", All.MultipleDomains * NTask * sizeof(int)); + int *copy_DomainEndList = mymalloc("copy_DomainEndList", All.MultipleDomains * NTask * sizeof(int)); + + memcpy(copy_DomainStartList, DomainStartList, All.MultipleDomains * NTask * sizeof(int)); + memcpy(copy_DomainEndList, DomainEndList, All.MultipleDomains * NTask * sizeof(int)); + + /* apply permutation to DomainTask assignment */ + + for(int i = 0; i < NTask; i++) + for(int m = 0; m < All.MultipleDomains; m++) + { + DomainStartList[new_task[i] * All.MultipleDomains + m] = copy_DomainStartList[i * All.MultipleDomains + m]; + + DomainEndList[new_task[i] * All.MultipleDomains + m] = copy_DomainEndList[i * All.MultipleDomains + m]; + } + + myfree(copy_DomainEndList); + myfree(copy_DomainStartList); + + for(int i = 0; i < NTopleaves; i++) + DomainTask[i] = new_task[DomainTask[i]]; + + myfree(new_task); + myfree(domain_count); + myfree(count_per_task); + + double t1 = second(); + mpi_printf("DOMAIN: task reshuffling took %g sec\n", timediff(t0, t1)); +} diff --git a/src/amuse/community/arepo/src/domain/domain_box.c b/src/amuse/community/arepo/src/domain/domain_box.c new file mode 100644 index 0000000000..d7466f2449 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_box.c @@ -0,0 +1,336 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_box.c + * \date 05/2018 + * \brief Routines that determine domain box and do periodic wrapping. + * \details contains files: + * void domain_findExtent(void) + * void do_box_wrapping(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Move the coordinate in pos by the global displacement vector + * + * \param[in] pos coordinate vector (3 entries). + * \param[in] mode displacement mode, either DISPLACE_POSITION_FORWARD or DISPLACE_POSITION_BACKWARD + * + * \return void + */ +void domain_displacePosition(MyDouble *pos, enum domain_displace_mode mode) +{ + if(mode == DISPLACE_POSITION_FORWARD) + { + double xtmp, ytmp, ztmp; + pos[0] = WRAP_X(pos[0] + All.GlobalDisplacementVector[0]); + pos[1] = WRAP_Y(pos[1] + All.GlobalDisplacementVector[1]); + pos[2] = WRAP_Z(pos[2] + All.GlobalDisplacementVector[2]); + } + else if(mode == DISPLACE_POSITION_BACKWARD) + { + double xtmp, ytmp, ztmp; + pos[0] = WRAP_X(pos[0] - All.GlobalDisplacementVector[0]); + pos[1] = WRAP_Y(pos[1] - All.GlobalDisplacementVector[1]); + pos[2] = WRAP_Z(pos[2] - All.GlobalDisplacementVector[2]); + } + else + terminate("Unkown mode %d.", mode); +} + +/*! \brief Move the coordinate for all positions by the global displacement vector + * + * \param[in] mode displacement mode, either DISPLACE_POSITION_FORWARD or DISPLACE_POSITION_BACKWARD + * + * \return void + */ +static void domain_displacePositions(enum domain_displace_mode mode) +{ + for(int i = 0; i < NumPart; i++) + { + if(P[i].ID == 0 && P[i].Mass == 0) /* derefined */ + continue; + + domain_displacePosition(P[i].Pos, mode); + + if(i < NumGas) + domain_displacePosition(SphP[i].Center, mode); + } + +#ifdef PLACEHIGHRESREGION + domain_displacePosition(All.Xmintot[1], mode); + domain_displacePosition(All.Xmaxtot[1], mode); + domain_displacePosition(All.Corner[1], mode); + domain_displacePosition(All.UpperCorner[1], mode); +#endif +} + +/*! \brief Finds the extent of the global domain grid. + * + * The minimum extent is the box size. + * + * \return void + */ +void domain_findExtent(void) +{ + int i, j; + double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3]; + + /* determine local extension */ + for(j = 0; j < 3; j++) + { + /* preset to simulation box */ + xmin[j] = 0; + xmax[j] = boxSize; + } + // Take care of stretched box +#ifdef LONG_X + xmax[0] = boxSize_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + xmax[1] = boxSize_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + xmax[2] = boxSize_Z; +#endif /* #ifdef LONG_Z */ + + for(i = 0; i < NumPart; i++) + { +#ifdef ADDBACKGROUNDGRID + if(P[i].Type != 0) + continue; +#endif /* #ifdef ADDBACKGROUNDGRID */ + for(j = 0; j < 3; j++) + { + if(xmin[j] > P[i].Pos[j]) + xmin[j] = P[i].Pos[j]; + + if(xmax[j] < P[i].Pos[j]) + xmax[j] = P[i].Pos[j]; + } + } + + MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + +#ifdef ADDBACKGROUNDGRID + for(j = 0; j < 3; j++) + if(xmax_glob[j] < All.BoxSize) + xmax_glob[j] = All.BoxSize; + + for(j = 0; j < 3; j++) + if(xmin_glob[j] > 0) + xmin_glob[j] = 0; +#endif /* #ifdef ADDBACKGROUNDGRID */ + + len = 0; + for(j = 0; j < 3; j++) + if(xmax_glob[j] - xmin_glob[j] > len) + len = xmax_glob[j] - xmin_glob[j]; + +#if defined(GRAVITY_NOT_PERIODIC) && !defined(ADDBACKGROUNDGRID) + len *= 1.2; /* enlarge box a bit to avoid triggering of an out of box recovery */ +#else /* #if defined(GRAVITY_NOT_PERIODIC) && !defined(ADDBACKGROUNDGRID) */ + len *= 1.00001; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) && !defined(ADDBACKGROUNDGRID) #else */ + +#if defined(DO_NOT_RANDOMIZE_DOMAINCENTER) || !defined(GRAVITY_NOT_PERIODIC) || defined(ONEDIMS) || defined(TWODIMS) + for(j = 0; j < 3; j++) + { + DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]); + DomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len; + } +#else /* #if defined(DO_NOT_RANDOMIZE_DOMAINCENTER) || !defined(GRAVITY_NOT_PERIODIC) || defined(ONEDIMS) || defined(TWODIMS) */ + for(j = 0; j < 3; j++) + { + DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]); + DomainCenter[j] += (2. * get_random_number() - 1.) * 0.5 * len; + } + + MPI_Bcast(DomainCenter, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + len *= 2; + + for(j = 0; j < 3; j++) + DomainCorner[j] = DomainCenter[j] - 0.5 * len; +#endif /* #if defined(DO_NOT_RANDOMIZE_DOMAINCENTER) || !defined(GRAVITY_NOT_PERIODIC) || defined(ONEDIMS) || defined(TWODIMS) #else \ + */ + + DomainLen = len; + + DomainInverseLen = 1.0 / DomainLen; + DomainFac = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION)); + DomainBigFac = (DomainLen / (((long long)1) << 52)); +} + +/*! \brief Makes sure all particles are within box. + * + * This function makes sure that all particle coordinates (Pos) are + * periodically mapped onto the interval [0, BoxSize]. After this function + * has been called, a new domain decomposition should be done, which will + * also force a new tree construction. + * + * \return void + */ +void do_box_wrapping(void) +{ + int j; + double boxsize[3]; + +#ifdef ADDBACKGROUNDGRID + return; +#endif /* #ifdef ADDBACKGROUNDGRID */ + + for(j = 0; j < 3; j++) + boxsize[j] = All.BoxSize; + +#ifdef LONG_X + boxsize[0] *= LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + boxsize[1] *= LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + boxsize[2] *= LONG_Z; +#endif /* #ifdef LONG_Z */ + +#if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) && (NUMDIMS > 2) + domain_displacePositions(DISPLACE_POSITION_BACKWARD); + + if(ThisTask == 0) + { + double prefac = 1.; +#ifdef PLACEHIGHRESREGION + prefac = 0.5; +#endif + for(j = 0; j < 3; j++) + All.GlobalDisplacementVector[j] = (get_random_number() - 0.5) * boxsize[j] * prefac; + } + + mpi_printf("DOMAIN: New global displacement vector: %g, %g, %g\n", All.GlobalDisplacementVector[0], All.GlobalDisplacementVector[1], + All.GlobalDisplacementVector[2]); + MPI_Bcast(All.GlobalDisplacementVector, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + domain_displacePositions(DISPLACE_POSITION_FORWARD); +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) && (NUMDIMS > 2) */ + + int i; + for(i = 0; i < NumPart; i++) + { + if(i < NumGas) + trans_table[i].wrapped = 0; + +#if defined(GRAVITY_NOT_PERIODIC) + if(P[i].Type != 0) + continue; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + +#if !defined(REFLECTIVE_X) + while(P[i].Pos[0] < 0) + { + P[i].Pos[0] += boxsize[0]; + if(i < NumGas) + trans_table[i].wrapped |= 1; + } + + while(P[i].Pos[0] >= boxsize[0]) + { + P[i].Pos[0] -= boxsize[0]; + if(i < NumGas) + trans_table[i].wrapped |= 2; + } + +#else /* #if !defined(REFLECTIVE_X) */ + if(P[i].Pos[0] < 0 || P[i].Pos[0] >= boxsize[0]) + { + char buf[1000]; + + sprintf(buf, "i=%d ID=%d type=%d moved out of box. x=%g", i, P[i].ID, P[i].Type, P[i].Pos[0]); + terminate(buf); + } +#endif /* #if !defined(REFLECTIVE_X) #else */ + +#if !defined(REFLECTIVE_Y) + while(P[i].Pos[1] < 0) + { + P[i].Pos[1] += boxsize[1]; + if(i < NumGas) + trans_table[i].wrapped |= 4; + } + + while(P[i].Pos[1] >= boxsize[1]) + { + P[i].Pos[1] -= boxsize[1]; + if(i < NumGas) + trans_table[i].wrapped |= 8; + } + +#else /* #if !defined(REFLECTIVE_Y) */ + if(P[i].Pos[1] < 0 || P[i].Pos[1] >= boxsize[1]) + { + char buf[1000]; + + sprintf(buf, "i=%d ID=%d type=%d moved out of box. y=%g", i, P[i].ID, P[i].Type, P[i].Pos[1]); + terminate(buf); + } +#endif /* #if !defined(REFLECTIVE_Y) #else */ + +#if !defined(REFLECTIVE_Z) + while(P[i].Pos[2] < 0) + { + P[i].Pos[2] += boxsize[2]; + if(i < NumGas) + trans_table[i].wrapped |= 16; + } + + while(P[i].Pos[2] >= boxsize[2]) + { + P[i].Pos[2] -= boxsize[2]; + if(i < NumGas) + trans_table[i].wrapped |= 32; + } + +#else /* #if !defined(REFLECTIVE_Z) */ + if(P[i].Pos[2] < 0 || P[i].Pos[2] >= boxsize[2]) + { + char buf[1000]; + + sprintf(buf, "i=%d ID=%d type=%d moved out of box. z=%g", i, P[i].ID, P[i].Type, P[i].Pos[2]); + terminate(buf); + } +#endif /* #if !defined(REFLECTIVE_Z) #else */ + } +} diff --git a/src/amuse/community/arepo/src/domain/domain_counttogo.c b/src/amuse/community/arepo/src/domain/domain_counttogo.c new file mode 100644 index 0000000000..82e798ef7e --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_counttogo.c @@ -0,0 +1,84 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_counttogo.c + * \date 05/2018 + * \brief Functions to determine number of exchanged particles. + * \details contains functions: + * int domain_countToGo(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Determines communication matrix for particles and cells. + * + * This function determines how many particles that are currently stored + * on the local CPU have to be moved off according to the domain + * decomposition. + * + * \return 0 + */ +int domain_countToGo(void) +{ + for(int n = 0; n < NTask; n++) + { + toGo[n] = 0; + toGoSph[n] = 0; + } + + for(int n = 0; n < NumPart; n++) + { + int no = 0; + + while(topNodes[no].Daughter >= 0) + no = topNodes[no].Daughter + (Key[n] - topNodes[no].StartKey) / (topNodes[no].Size >> 3); + + no = topNodes[no].Leaf; + + if(DomainTask[no] != ThisTask) + { + toGo[DomainTask[no]] += 1; + + if(P[n].Type == 0) + toGoSph[DomainTask[no]] += 1; + } + } + + MPI_Alltoall(toGo, 1, MPI_INT, toGet, 1, MPI_INT, MPI_COMM_WORLD); + MPI_Alltoall(toGoSph, 1, MPI_INT, toGetSph, 1, MPI_INT, MPI_COMM_WORLD); + + return 0; +} diff --git a/src/amuse/community/arepo/src/domain/domain_exchange.c b/src/amuse/community/arepo/src/domain/domain_exchange.c new file mode 100644 index 0000000000..454cfafc82 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_exchange.c @@ -0,0 +1,399 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_exchange.c + * \date 05/2018 + * \brief Algorithms for exchanging particle data and associated + * rearrangements. + * \details This includes changing the size of the P and SphP arrays as + * well as the particle exchange routine itself. + * contains functions: + * void domain_resize_storage(int count_get, int count_get_sph, + * int option_flag) + * void domain_exchange(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Changes memory allocation if necessary for particle and cell data. + * + * If the memory usage due to a net import or export of particles changes + * above a certain tolerance, the P and SphP structures need to be + * reallocated. + * + * \param[in] count get How many particles are imported? + * \param[in] count_get_sph How many cells are imported? + * \param[in] option_flag Options for reallocating peanokey or ngbtree. + * + * \return void + */ +void domain_resize_storage(int count_get, int count_get_sph, int option_flag) +{ + int load = NumPart + count_get; + int sphload = NumGas + count_get_sph; + int loc_data[2] = {load, sphload}, res[2]; + + MPI_Allreduce(loc_data, res, 2, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + int max_load = res[0]; + int max_sphload = res[1]; + + if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * All.MaxPart) + { + All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + reallocate_memory_maxpart(); + + if(option_flag == 1) + Key = (peanokey *)myrealloc_movable(Key, sizeof(peanokey) * All.MaxPart); + } + + if(max_sphload >= (1.0 - ALLOC_TOLERANCE) * All.MaxPartSph || max_sphload < (1.0 - 3 * ALLOC_TOLERANCE) * All.MaxPartSph) + { + All.MaxPartSph = max_sphload / (1.0 - 2 * ALLOC_TOLERANCE); + if(option_flag == 2) + { + if(All.MaxPartSph > Ngb_MaxPart) + ngb_treemodifylength(All.MaxPartSph - Ngb_MaxPart); + } + reallocate_memory_maxpartsph(); + } +} + +/*! \brief Exchanges particles and cells according to new domain decomposition. + * + * Communicates particles and cells to their new task. P and SphP arrays are + * changed in size accordingly. + * + * \return void + */ +void domain_exchange(void) +{ + double t0 = second(); + + int count_togo = 0, count_togo_sph = 0, count_get = 0, count_get_sph = 0; + int *count, *count_sph, *offset, *offset_sph; + int *count_recv, *count_recv_sph, *offset_recv, *offset_recv_sph; + int i, n, no, target; + struct particle_data *partBuf; + struct sph_particle_data *sphBuf; + + peanokey *keyBuf; + + long long sumtogo = 0; + + for(i = 0; i < NTask; i++) + sumtogo += toGo[i]; + + sumup_longs(1, &sumtogo, &sumtogo); + + count = (int *)mymalloc_movable(&count, "count", NTask * sizeof(int)); + count_sph = (int *)mymalloc_movable(&count_sph, "count_sph", NTask * sizeof(int)); + offset = (int *)mymalloc_movable(&offset, "offset", NTask * sizeof(int)); + offset_sph = (int *)mymalloc_movable(&offset_sph, "offset_sph", NTask * sizeof(int)); + count_recv = (int *)mymalloc_movable(&count_recv, "count_recv", NTask * sizeof(int)); + count_recv_sph = (int *)mymalloc_movable(&count_recv_sph, "count_recv_sph", NTask * sizeof(int)); + offset_recv = (int *)mymalloc_movable(&offset_recv, "offset_recv", NTask * sizeof(int)); + offset_recv_sph = (int *)mymalloc_movable(&offset_recv_sph, "offset_recv_sph", NTask * sizeof(int)); + + int prec_offset; + int *decrease; + + decrease = (int *)mymalloc_movable(&decrease, "decrease", NTask * sizeof(int)); + + for(i = 1, offset_sph[0] = 0, decrease[0] = 0; i < NTask; i++) + { + offset_sph[i] = offset_sph[i - 1] + toGoSph[i - 1]; + decrease[i] = toGoSph[i - 1]; + } + + prec_offset = offset_sph[NTask - 1] + toGoSph[NTask - 1]; + + offset[0] = prec_offset; + for(i = 1; i < NTask; i++) + offset[i] = offset[i - 1] + (toGo[i - 1] - decrease[i]); + + myfree(decrease); + + for(i = 0; i < NTask; i++) + { + count_togo += toGo[i]; + count_togo_sph += toGoSph[i]; + count_get += toGet[i]; + count_get_sph += toGetSph[i]; + } + + partBuf = (struct particle_data *)mymalloc_movable(&partBuf, "partBuf", count_togo * sizeof(struct particle_data)); + sphBuf = (struct sph_particle_data *)mymalloc_movable(&sphBuf, "sphBuf", count_togo_sph * sizeof(struct sph_particle_data)); + + keyBuf = (peanokey *)mymalloc_movable(&keyBuf, "keyBuf", count_togo * sizeof(peanokey)); + + for(i = 0; i < NTask; i++) + { + count[i] = count_sph[i] = 0; + } + + for(n = 0; n < NumPart; n++) + { + no = 0; + + peanokey mask = ((peanokey)7) << (3 * (BITS_PER_DIMENSION - 1)); + int shift = 3 * (BITS_PER_DIMENSION - 1); + + while(topNodes[no].Daughter >= 0) + { + no = topNodes[no].Daughter + (int)((Key[n] & mask) >> shift); + mask >>= 3; + shift -= 3; + } + + no = topNodes[no].Leaf; + + target = DomainTask[no]; + + if(target != ThisTask) + { + /* copy this particle into the exchange buffer */ + if(P[n].Type == 0) + { + partBuf[offset_sph[target] + count_sph[target]] = P[n]; + keyBuf[offset_sph[target] + count_sph[target]] = Key[n]; + sphBuf[offset_sph[target] + count_sph[target]] = SphP[n]; + count_sph[target]++; + } + else + { + partBuf[offset[target] + count[target]] = P[n]; + keyBuf[offset[target] + count[target]] = Key[n]; + count[target]++; + } + + if(P[n].Type == 0) + { + P[n] = P[NumGas - 1]; + P[NumGas - 1] = P[NumPart - 1]; + + Key[n] = Key[NumGas - 1]; + Key[NumGas - 1] = Key[NumPart - 1]; + + SphP[n] = SphP[NumGas - 1]; + + NumGas--; + } + else + { + P[n] = P[NumPart - 1]; + Key[n] = Key[NumPart - 1]; + } + + NumPart--; + n--; + + } /* target != ThisTask */ + } /* n < NumPart */ + + /**** now resize the storage for the P[] and SphP[] arrays if needed ****/ + domain_resize_storage(count_get, count_get_sph, 1); + + /***** space has been created, now can do the actual exchange *****/ + int count_totget = count_get_sph; + + if(count_totget) + { + memmove(P + NumGas + count_totget, P + NumGas, (NumPart - NumGas) * sizeof(struct particle_data)); + memmove(Key + NumGas + count_totget, Key + NumGas, (NumPart - NumGas) * sizeof(peanokey)); + } + + for(i = 0; i < NTask; i++) + { + count_recv_sph[i] = toGetSph[i]; + count_recv[i] = toGet[i] - toGetSph[i]; + } + + int prec_count; + for(i = 1, offset_recv_sph[0] = NumGas; i < NTask; i++) + offset_recv_sph[i] = offset_recv_sph[i - 1] + count_recv_sph[i - 1]; + prec_count = NumGas + count_get_sph; + + offset_recv[0] = NumPart - NumGas + prec_count; + + for(i = 1; i < NTask; i++) + offset_recv[i] = offset_recv[i - 1] + count_recv[i - 1]; + +#ifndef USE_MPIALLTOALLV_IN_DOMAINDECOMP + + int ngrp; +#ifdef NO_ISEND_IRECV_IN_DOMAIN /* synchronous communication */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = ThisTask ^ ngrp; + + if(target < NTask) + { + if(count_sph[target] > 0 || count_recv_sph[target] > 0) + { + MPI_Sendrecv(partBuf + offset_sph[target], count_sph[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA_SPH, P + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct particle_data), MPI_BYTE, + target, TAG_PDATA_SPH, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MPI_Sendrecv(sphBuf + offset_sph[target], count_sph[target] * sizeof(struct sph_particle_data), MPI_BYTE, target, + TAG_SPHDATA, SphP + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct sph_particle_data), + MPI_BYTE, target, TAG_SPHDATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MPI_Sendrecv(keyBuf + offset_sph[target], count_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH, + Key + offset_recv_sph[target], count_recv_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + if(count[target] > 0 || count_recv[target] > 0) + { + MPI_Sendrecv(partBuf + offset[target], count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + P + offset_recv[target], count_recv[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MPI_Sendrecv(keyBuf + offset[target], count[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, + Key + offset_recv[target], count_recv[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + +#else /* #ifdef NO_ISEND_IRECV_IN_DOMAIN */ + /* asynchronous communication */ + + MPI_Request *requests = (MPI_Request *)mymalloc_movable(&requests, "requests", 30 * NTask * sizeof(MPI_Request)); + int n_requests = 0; + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = ThisTask ^ ngrp; + + if(target < NTask) + { + if(count_recv_sph[target] > 0) + { + MPI_Irecv(P + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA_SPH, MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Irecv(SphP + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct sph_particle_data), MPI_BYTE, target, + TAG_SPHDATA, MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Irecv(Key + offset_recv_sph[target], count_recv_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH, + MPI_COMM_WORLD, &requests[n_requests++]); + } + + if(count_recv[target] > 0) + { + MPI_Irecv(P + offset_recv[target], count_recv[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Irecv(Key + offset_recv[target], count_recv[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, MPI_COMM_WORLD, + &requests[n_requests++]); + } + } + } + + MPI_Barrier(MPI_COMM_WORLD); /* not really necessary, but this will guarantee that all receives are + posted before the sends, which helps the stability of MPI on + bluegene, and perhaps some mpich1-clusters */ + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = ThisTask ^ ngrp; + + if(target < NTask) + { + if(count_sph[target] > 0) + { + MPI_Isend(partBuf + offset_sph[target], count_sph[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA_SPH, MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Isend(sphBuf + offset_sph[target], count_sph[target] * sizeof(struct sph_particle_data), MPI_BYTE, target, + TAG_SPHDATA, MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Isend(keyBuf + offset_sph[target], count_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH, + MPI_COMM_WORLD, &requests[n_requests++]); + } + + if(count[target] > 0) + { + MPI_Isend(partBuf + offset[target], count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Isend(keyBuf + offset[target], count[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, MPI_COMM_WORLD, + &requests[n_requests++]); + } + } + } + + MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE); + myfree(requests); +#endif /* #ifdef NO_ISEND_IRECV_IN_DOMAIN #else */ + +#else /* #ifndef USE_MPIALLTOALLV_IN_DOMAINDECOMP */ + /* begins block of myMPI_Alltoallv communications */ + + myMPI_Alltoallv(partBuf, count_sph, offset_sph, P, count_recv_sph, offset_recv_sph, sizeof(struct particle_data), 0, MPI_COMM_WORLD); + + myMPI_Alltoallv(sphBuf, count_sph, offset_sph, SphP, count_recv_sph, offset_recv_sph, sizeof(struct sph_particle_data), 0, + MPI_COMM_WORLD); + + myMPI_Alltoallv(keyBuf, count_sph, offset_sph, Key, count_recv_sph, offset_recv_sph, sizeof(peanokey), 0, MPI_COMM_WORLD); + + myMPI_Alltoallv(partBuf, count, offset, P, count_recv, offset_recv, sizeof(struct particle_data), 0, MPI_COMM_WORLD); + + myMPI_Alltoallv(keyBuf, count, offset, Key, count_recv, offset_recv, sizeof(peanokey), 0, MPI_COMM_WORLD); + +#endif /* #ifndef USE_MPIALLTOALLV_IN_DOMAINDECOMP #else */ + /* close block of myMPI_Alltoallv communications */ + + NumPart += count_get; + NumGas += count_get_sph; + + myfree(keyBuf); + myfree(sphBuf); + myfree(partBuf); + myfree(offset_recv_sph); + myfree(offset_recv); + myfree(count_recv_sph); + myfree(count_recv); + myfree(offset_sph); + myfree(offset); + myfree(count_sph); + myfree(count); + + double t1 = second(); + mpi_printf("DOMAIN: exchange of %lld particles done. (took %g sec)\n", sumtogo, timediff(t0, t1)); +} diff --git a/src/amuse/community/arepo/src/domain/domain_rearrange.c b/src/amuse/community/arepo/src/domain/domain_rearrange.c new file mode 100644 index 0000000000..e75b37872e --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_rearrange.c @@ -0,0 +1,129 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_rearrange.c + * \date 05/2018 + * \brief Rearranges particle and cell arrays and gets rid of inactive + * particles. + * \details contains functions: + * void domain_rearrange_particle_sequence(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Gets rid of inactive/eliminated cells and particles. + * + * Cells that were de-refined or turned into star particles are kept in the + * SphP array, but flagged as inactive until this point. This routine cleans + * up these arrays in order to make sure only active particles/cells are + * exported. + * + * \return void + */ +void domain_rearrange_particle_sequence(void) +{ +#if defined(USE_SFR) + if(Stars_converted) + { + struct particle_data psave; + peanokey key; + + for(int i = 0; i < NumGas; i++) + if(P[i].Type != 0) /*If not a gas particle, swap to the end of the list */ + { + psave = P[i]; + key = Key[i]; + + P[i] = P[NumGas - 1]; + SphP[i] = SphP[NumGas - 1]; + Key[i] = Key[NumGas - 1]; + + P[NumGas - 1] = psave; + Key[NumGas - 1] = key; + + NumGas--; + i--; + } + /* Now we have rearranged the particles, + * we don't need to do it again unless there are more stars + */ + Stars_converted = 0; + } +#endif /* #if defined(USE_SFR) */ + +#if defined(REFINEMENT_MERGE_CELLS) + int i, count_elim, count_gaselim; + + count_elim = 0; + count_gaselim = 0; + + for(i = 0; i < NumPart; i++) + if((P[i].Mass == 0 && P[i].ID == 0) || (P[i].Type == 4 && P[i].Mass == 0)) + { + if(P[i].Type == 0) + { + P[i] = P[NumGas - 1]; + SphP[i] = SphP[NumGas - 1]; + Key[i] = Key[NumGas - 1]; + + P[NumGas - 1] = P[NumPart - 1]; + Key[NumGas - 1] = Key[NumPart - 1]; + + NumGas--; + count_gaselim++; + } + + NumPart--; + i--; + count_elim++; + } + + int count[2] = {count_elim, count_gaselim}; + int tot[2] = {0, 0}, nelem = 2; + + MPI_Allreduce(count, tot, nelem, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + printf("DOMAIN: Eliminated %d derefined/swallowed gas cells.\n", tot[1]); + myflush(stdout); + } + + All.TotNumPart -= tot[0]; + All.TotNumGas -= tot[1]; + +#endif /* #if defined(REFINEMENT_MERGE_CELLS */ +} diff --git a/src/amuse/community/arepo/src/domain/domain_sort_kernels.c b/src/amuse/community/arepo/src/domain/domain_sort_kernels.c new file mode 100644 index 0000000000..b0ad2c7a28 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_sort_kernels.c @@ -0,0 +1,158 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_sort_kernels.c + * \date 05/2018 + * \brief Comparison and sorting functions for Peano-Hilbert data. + * \details contains functions: + * int domain_compare_count(const void *a, const void *b) + * int domain_compare_key(const void *a, const void *b) + * static void msort_domain_with_tmp(struct + * domain_peano_hilbert_data *b, size_t n, struct + * domain_peano_hilbert_data *t) + * void mysort_domain(void *b, size_t n, size_t s) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Comparison function for domain_count_data objects. + * + * Compares the variable count. + * + * \param[in] a Pointer to first domain_count_data object. + * \param[in] b Pointer to second domain_count_data object. + * + * \return 1 if b>a; -1 if a>b; otherwise 0. + */ +int domain_compare_count(const void *a, const void *b) +{ + if(((struct domain_count_data *)a)->count > (((struct domain_count_data *)b)->count)) + return -1; + + if(((struct domain_count_data *)a)->count < (((struct domain_count_data *)b)->count)) + return +1; + + return 0; +} + +/*! \brief Comparison function for domain_peano_hilbert_data objects. + * + * Compares element key. + * + * \param[in] a Pointer to first domain_peano_hilbert_data object. + * \param[in] b Pointer to second domain_peano_hilbert_data object. + * + * \return 1 if b>a; -1 if a>b; otherwise 0. + */ +int domain_compare_key(const void *a, const void *b) +{ + if(((struct domain_peano_hilbert_data *)a)->key < (((struct domain_peano_hilbert_data *)b)->key)) + return -1; + + if(((struct domain_peano_hilbert_data *)a)->key > (((struct domain_peano_hilbert_data *)b)->key)) + return +1; + + return 0; +} + +/*! \brief Customized mergesort sorting routine, requires temporary array. + * + * \param[in, out] b domain_peano_hilbert data array that is to be sorted. + * \param[in] n Number of elements in array. + * \param[in, out] t Temporary domain_peano_hilbert data array. + * + * \return void + */ +static void msort_domain_with_tmp(struct domain_peano_hilbert_data *b, size_t n, struct domain_peano_hilbert_data *t) +{ + struct domain_peano_hilbert_data *tmp; + struct domain_peano_hilbert_data *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + msort_domain_with_tmp(b1, n1, t); + msort_domain_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(b1->key <= b2->key) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(struct domain_peano_hilbert_data)); + + memcpy(b, t, (n - n2) * sizeof(struct domain_peano_hilbert_data)); +} + +/*! \brief Customized mergesort sorting routine. + * + * This function tends to work slightly faster than a call of qsort() for + * this particular list, at least on most platforms. + * + * \param[in, out] b domain_peano_hilbert data array that is to be sorted. + * \param[in] n Number of elements. + * \param[in] s Size of structure. + * + * \return void + */ +void mysort_domain(void *b, size_t n, size_t s) +{ + const size_t size = n * s; + struct domain_peano_hilbert_data *tmp; + + tmp = (struct domain_peano_hilbert_data *)mymalloc("tmp", size); + + msort_domain_with_tmp((struct domain_peano_hilbert_data *)b, n, tmp); + + myfree(tmp); +} diff --git a/src/amuse/community/arepo/src/domain/domain_toplevel.c b/src/amuse/community/arepo/src/domain/domain_toplevel.c new file mode 100644 index 0000000000..6c1fc22ac5 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_toplevel.c @@ -0,0 +1,393 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_toplevel.c + * \date 05/2018 + * \brief Top level tree construction and walk routines used for the + * domain decomposition. + * \details Uses BSD macros. + * contains functions: + * static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) + * int domain_determineTopTree(void) + * void domain_do_local_refine(int n, int *list) + * void domain_walktoptree(int no) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "bsd_tree.h" +#include "domain.h" + +/*! \brief Structure of tree nodes. + */ +struct mydata +{ + double workload; + int topnode_index; + + RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */ +}; + +/*! \brief Comparison function of tree elements. + * + * Compares elements workload and topnode_index. + * + * \param[in] lhs pointer to left hand side top level tree node. + * \param[in] rhs pointer to right hand side top level tree node. + * + * \return -1: left is larger or lower topnode index, 1 opposite, 0 equal. + */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) +{ + if(lhs->workload > rhs->workload) + return -1; + else if(lhs->workload < rhs->workload) + return 1; + else if(lhs->topnode_index < rhs->topnode_index) + return -1; + else if(lhs->topnode_index > rhs->topnode_index) + return 1; + + return 0; +} + +/* the following macro declares 'struct mytree', which is the header element + * needed as handle for a tree + */ +RB_HEAD(mytree, mydata); + +static struct mydata *nload; +static struct mytree queue_load; + +/* the following macros declare appropriate function prototypes and functions + * needed for this type of tree + */ +RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp); +RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp); + +static double *list_cost, *list_sphcost; + +/*! \brief Construct top-level tree. + * + * This function constructs the global top-level tree node that is used + * for the domain decomposition. This is done by considering the string of + * Peano-Hilbert keys for all particles, which is recursively chopped off + * in pieces of eight segments until each segment holds at most a certain + * number of particles. + * + * \return 0 + */ +int domain_determineTopTree(void) +{ + double t0 = second(); + int count = 0, message_printed = 0; + + mp = (struct domain_peano_hilbert_data *)mymalloc_movable(&mp, "mp", sizeof(struct domain_peano_hilbert_data) * NumPart); + list_cost = mymalloc_movable(&list_cost, "list_cost", sizeof(double) * NumPart); + list_sphcost = mymalloc_movable(&list_sphcost, "listsph_cost", sizeof(double) * NumPart); + + for(int i = 0; i < NumPart; i++) + { + peano1D xb = domain_double_to_int(((P[i].Pos[0] - DomainCorner[0]) * DomainInverseLen) + 1.0); + peano1D yb = domain_double_to_int(((P[i].Pos[1] - DomainCorner[1]) * DomainInverseLen) + 1.0); + peano1D zb = domain_double_to_int(((P[i].Pos[2] - DomainCorner[2]) * DomainInverseLen) + 1.0); + + mp[count].key = Key[i] = peano_hilbert_key(xb, yb, zb, BITS_PER_DIMENSION); + mp[count].index = i; + count++; + + list_cost[i] = domain_grav_tot_costfactor(i); + list_sphcost[i] = domain_hydro_tot_costfactor(i); + } + + /* sort according to key (local particles!) */ + mysort_domain(mp, count, sizeof(struct domain_peano_hilbert_data)); + + NTopnodes = 1; + NTopleaves = 1; + topNodes[0].Daughter = -1; + topNodes[0].Parent = -1; + topNodes[0].Size = PEANOCELLS; + topNodes[0].StartKey = 0; + topNodes[0].PIndex = 0; + topNodes[0].Count = count; + topNodes[0].Cost = gravcost; + topNodes[0].SphCost = sphcost; + + int limitNTopNodes = 2 * imax(1 + (NTask / 7 + 1) * 8, All.TopNodeFactor * All.MultipleDomains * NTask); + +#ifdef ADDBACKGROUNDGRID + limitNTopNodes = imax(limitNTopNodes, 2 * All.GridSize * All.GridSize * All.GridSize); +#endif /* #ifdef ADDBACKGROUNDGRID */ + + while(limitNTopNodes > MaxTopNodes) + { + mpi_printf("DOMAIN: Increasing TopNodeAllocFactor=%g ", All.TopNodeAllocFactor); + All.TopNodeAllocFactor *= 1.3; + mpi_printf("new value=%g\n", All.TopNodeAllocFactor); + if(All.TopNodeAllocFactor > 1000) + terminate("something seems to be going seriously wrong here. Stopping.\n"); + + MaxTopNodes = (int)(All.TopNodeAllocFactor * All.MaxPart + 1); + + topNodes = (struct local_topnode_data *)myrealloc_movable(topNodes, (MaxTopNodes * sizeof(struct local_topnode_data))); + TopNodes = (struct topnode_data *)myrealloc_movable(TopNodes, (MaxTopNodes * sizeof(struct topnode_data))); + DomainTask = (int *)myrealloc_movable(DomainTask, (MaxTopNodes * sizeof(int))); + DomainLeaveNode = (struct domain_cost_data *)myrealloc_movable(DomainLeaveNode, (MaxTopNodes * sizeof(struct domain_cost_data))); + } + + RB_INIT(&queue_load); + nload = mymalloc("nload", limitNTopNodes * sizeof(struct mydata)); + int *list = mymalloc("list", limitNTopNodes * sizeof(int)); + +#ifdef ADDBACKGROUNDGRID + peanokey MaxTopleaveSize = (PEANOCELLS / (All.GridSize * All.GridSize * All.GridSize)); +#else /* #ifdef ADDBACKGROUNDGRID */ + double limit = 1.0 / (All.TopNodeFactor * All.MultipleDomains * NTask); +#endif /* #ifdef ADDBACKGROUNDGRID #else */ + + /* insert the root node */ + nload[0].workload = 1.0; + nload[0].topnode_index = 0; + RB_INSERT(mytree, &queue_load, &nload[0]); + + int iter = 0; + + do + { + count = 0; + + double first_workload = 0; + + for(struct mydata *nfirst = RB_MIN(mytree, &queue_load); nfirst != NULL; nfirst = RB_NEXT(mytree, &queue_load, nfirst)) + { + if(topNodes[nfirst->topnode_index].Size >= 8) + { + first_workload = nfirst->workload; + break; + } + } + + for(struct mydata *np = RB_MIN(mytree, &queue_load); np != NULL; np = RB_NEXT(mytree, &queue_load, np)) + { +#ifndef ADDBACKGROUNDGRID + if(np->workload < 0.125 * first_workload) + break; + + if(NTopnodes + 8 * (count + 1) >= limitNTopNodes) + break; +#endif /* #ifndef ADDBACKGROUNDGRID */ + +#ifdef ADDBACKGROUNDGRID + if(topNodes[np->topnode_index].Size > MaxTopleaveSize) +#else /* #ifdef ADDBACKGROUNDGRID */ + if(np->workload > limit || (NTopleaves < All.MultipleDomains * NTask && count == 0)) +#endif /* #ifdef ADDBACKGROUNDGRID #else */ + { + if(topNodes[np->topnode_index].Size < 8) + { + if(message_printed == 0) + { + mpi_printf("DOMAIN: Note: we would like to refine top-tree, but PEANOGRID is not fine enough\n"); +#ifndef OVERRIDE_PEANOGRID_WARNING + terminate( + "Consider setting BITS_PER_DIMENSION up to a value of 42 to get a fine enough PEANOGRID, or force a " + "continuation by activating OVERRIDE_PEANOGRID_WARNING"); +#endif /* #ifndef OVERRIDE_PEANOGRID_WARNING */ + message_printed = 1; + } + } + else + { + list[count] = np->topnode_index; + count++; + } + } + } + + if(count > 0) + { + domain_do_local_refine(count, list); + iter++; + } + } + while(count > 0); + + myfree(list); + myfree(nload); + myfree(list_sphcost); + myfree(list_cost); + myfree(mp); + + /* count the number of top leaves */ + NTopleaves = 0; + domain_walktoptree(0); + + double t1 = second(); + mpi_printf("DOMAIN: NTopleaves=%d, determination of top-level tree involved %d iterations and took %g sec\n", NTopleaves, iter, + timediff(t0, t1)); + + t0 = second(); + + domain_sumCost(); + + t1 = second(); + mpi_printf("DOMAIN: cost summation for top-level tree took %g sec\n", timediff(t0, t1)); + + return 0; +} + +/*! \brief Refine top-level tree locally. + * + * Requires arrays list_cost and list_sphcost, mp. + * + * \param[in] n Number of nodes that should be refined. + * \param[in] list List of node indices that should be refined. + * + * \return void + */ +void domain_do_local_refine(int n, int *list) +{ + double *worktotlist = mymalloc("worktotlist", 8 * n * sizeof(double)); + double *worklist = mymalloc("worklist", 8 * n * sizeof(double)); + + double non_zero = 0, non_zero_tot; + + /* create the new nodes */ + for(int k = 0; k < n; k++) + { + int i = list[k]; + topNodes[i].Daughter = NTopnodes; + NTopnodes += 8; + NTopleaves += 7; + + for(int j = 0; j < 8; j++) + { + int sub = topNodes[i].Daughter + j; + + topNodes[sub].Daughter = -1; + topNodes[sub].Parent = i; + topNodes[sub].Size = (topNodes[i].Size >> 3); + topNodes[sub].StartKey = topNodes[i].StartKey + j * topNodes[sub].Size; + topNodes[sub].PIndex = topNodes[i].PIndex; + topNodes[sub].Count = 0; + topNodes[sub].Cost = 0; + topNodes[sub].SphCost = 0; + } + + int sub = topNodes[i].Daughter; + + for(int p = topNodes[i].PIndex, j = 0; p < topNodes[i].PIndex + topNodes[i].Count; p++) + { + if(j < 7) + while(mp[p].key >= topNodes[sub + 1].StartKey) + { + j++; + sub++; + topNodes[sub].PIndex = p; + if(j >= 7) + break; + } + + topNodes[sub].Cost += list_cost[mp[p].index]; + topNodes[sub].SphCost += list_sphcost[mp[p].index]; + topNodes[sub].Count++; + } + + for(int j = 0; j < 8; j++) + { + int sub = topNodes[i].Daughter + j; + worklist[k * 8 + j] = fac_work * topNodes[sub].Cost + fac_worksph * topNodes[sub].SphCost + fac_load * topNodes[sub].Count; + + if(worklist[k * 8 + j] != 0) + non_zero++; + } + } + + MPI_Allreduce(&non_zero, &non_zero_tot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + if(non_zero_tot > 0.05 * (NTask * 8 * n)) + MPI_Allreduce(worklist, worktotlist, 8 * n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + else + allreduce_sparse_double_sum(worklist, worktotlist, 8 * n); + + for(int k = 0; k < n; k++) + { + int i = list[k]; + RB_REMOVE(mytree, &queue_load, &nload[i]); + } + + for(int k = 0, l = 0; k < n; k++) + { + int i = list[k]; + + for(int j = 0; j < 8; j++, l++) + { + int sub = topNodes[i].Daughter + j; + + /* insert the node */ + nload[sub].workload = worktotlist[l]; + nload[sub].topnode_index = sub; + RB_INSERT(mytree, &queue_load, &nload[sub]); + } + } + + myfree(worklist); + myfree(worktotlist); +} + +/*! \brief Walks top level tree recursively. + * + * This function walks the global top tree in order to establish the + * number of leaves it has, and for assigning the leaf numbers along the + * Peano-Hilbert Curve. These leaves are later combined to domain pieces, + * which are distributed to different processors. + * + * \param[in] no Present node. + * + * \return void + */ +void domain_walktoptree(int no) +{ + if(topNodes[no].Daughter == -1) + { + topNodes[no].Leaf = NTopleaves; + NTopleaves++; + } + else + { + for(int i = 0; i < 8; i++) + domain_walktoptree(topNodes[no].Daughter + i); + } +} diff --git a/src/amuse/community/arepo/src/domain/domain_vars.c b/src/amuse/community/arepo/src/domain/domain_vars.c new file mode 100644 index 0000000000..d108a7dc8f --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_vars.c @@ -0,0 +1,117 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_vars.c + * \date 05/2018 + * \brief Variables and memory allocation functions for domain + * decomposition. + * \details contains functions: + * void domain_allocate_lists(void) + * void domain_free_lists(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +struct domain_peano_hilbert_data *mp; + +struct local_topnode_data *topNodes, *branchNodes; /*!< points to the root node of the top-level tree */ + +double totgravcost, totpartcount, gravcost, totsphcost, sphcost; + +struct domain_cost_data *DomainLeaveNode; + +double fac_work, fac_load, fac_worksph; +double normsum_work, normsum_load, normsum_worksph; + +int Nbranch; + +/*! toGo[partner] gives the number of particles on the current task that have to go to task 'partner' + */ +int *toGo, *toGoSph; +int *toGet, *toGetSph; +int *list_NumPart; +int *list_NumGas; +int *list_load; +int *list_loadsph; +double *list_work; +double *list_worksph; + +/*! \brief Allocates lists needed for communication in domain decomposition. + * + * These lists are holding information about other tasks (number of particles, + * load, work, etc.). + * + * \return void + */ +void domain_allocate_lists(void) +{ + Key = (peanokey *)mymalloc_movable(&Key, "domain_key", (sizeof(peanokey) * All.MaxPart)); + toGo = (int *)mymalloc_movable(&toGo, "toGo", (sizeof(int) * NTask)); + toGoSph = (int *)mymalloc_movable(&toGoSph, "toGoSph", (sizeof(int) * NTask)); + toGet = (int *)mymalloc_movable(&toGet, "toGet", (sizeof(int) * NTask)); + toGetSph = (int *)mymalloc_movable(&toGetSph, "toGetSph", (sizeof(int) * NTask)); + list_NumPart = (int *)mymalloc_movable(&list_NumPart, "list_NumPart", (sizeof(int) * NTask)); + list_NumGas = (int *)mymalloc_movable(&list_NumGas, "list_NumGas", (sizeof(int) * NTask)); + list_load = (int *)mymalloc_movable(&list_load, "list_load", (sizeof(int) * NTask)); + list_loadsph = (int *)mymalloc_movable(&list_loadsph, "list_loadsph", (sizeof(int) * NTask)); + list_work = (double *)mymalloc_movable(&list_work, "list_work", (sizeof(double) * NTask)); + list_worksph = (double *)mymalloc_movable(&list_worksph, "list_worksph", (sizeof(double) * NTask)); + DomainLeaveNode = (struct domain_cost_data *)mymalloc_movable(&DomainLeaveNode, "DomainLeaveNode", + (MaxTopNodes * sizeof(struct domain_cost_data))); +} + +/*! \brief Frees lists needed for communication in domain decomposition. + * + * This routine is the counterpart of domain_allocate_lists(void). + * Frees memory of all arrays allocated there, except Key, which is freed + * elsewhere (in void domain_Decomposition(void); see domain.c). + * + * \return void + */ +void domain_free_lists(void) +{ + myfree(DomainLeaveNode); + myfree(list_worksph); + myfree(list_work); + myfree(list_loadsph); + myfree(list_load); + myfree(list_NumGas); + myfree(list_NumPart); + myfree(toGetSph); + myfree(toGet); + myfree(toGoSph); + myfree(toGo); +} diff --git a/src/amuse/community/arepo/src/domain/peano.c b/src/amuse/community/arepo/src/domain/peano.c new file mode 100644 index 0000000000..adb30ca43d --- /dev/null +++ b/src/amuse/community/arepo/src/domain/peano.c @@ -0,0 +1,569 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/peano.c + * \date 05/2018 + * \brief Order particles along Peano-Hilbert curve. + * \details contains functions: + * void peano_hilbert_order(void) + * void peano_hilbert_order_DP(void) + * int peano_compare_key(const void *a, const void *b) + * void reorder_DP(void) + * void reorder_gas(int *Id) + * void reorder_particles(int *Id) + * peanokey peano_hilbert_key(peano1D x, peano1D y, peano1D z, + * int bits) + * void peano_hilbert_key_inverse(peanokey key, int bits, + * peano1D * x, peano1D * y, peano1D * z) + * static void msort_peano_with_tmp(struct peano_hilbert_data + * *b, size_t n, struct peano_hilbert_data *t) + * void mysort_peano(void *b, size_t n, size_t s, int (*cmp) + * (const void *, const void *)) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +#include + +/*! Data structure for Peano Hilbert data. + */ +static struct peano_hilbert_data +{ + peanokey key; + int index; +} * pmp; + +static int *Id; + +/*! \brief Sorts particles along Peano-Hilbert curve + * + * \return void + */ +void peano_hilbert_order(void) +{ + int i; + + double t0 = second(); + + // mpi_printf("DOMAIN: begin Peano-Hilbert order...\n"); + + if(NumGas) + { + pmp = (struct peano_hilbert_data *)mymalloc("pmp", sizeof(struct peano_hilbert_data) * NumGas); + Id = (int *)mymalloc("Id", sizeof(int) * NumGas); + + for(i = 0; i < NumGas; i++) + { + pmp[i].index = i; + pmp[i].key = Key[i]; + } + + mysort_peano(pmp, NumGas, sizeof(struct peano_hilbert_data), peano_compare_key); + + for(i = 0; i < NumGas; i++) + Id[pmp[i].index] = i; + + reorder_gas(Id); + + myfree(Id); + myfree(pmp); + } + + if(NumPart - NumGas > 0) + { + pmp = (struct peano_hilbert_data *)mymalloc("pmp", sizeof(struct peano_hilbert_data) * (NumPart - NumGas)); + pmp -= (NumGas); + + Id = (int *)mymalloc("Id", sizeof(int) * (NumPart - NumGas)); + Id -= (NumGas); + + for(i = NumGas; i < NumPart; i++) + { + pmp[i].index = i; + pmp[i].key = Key[i]; + } + + mysort_peano(pmp + NumGas, NumPart - NumGas, sizeof(struct peano_hilbert_data), peano_compare_key); + + for(i = NumGas; i < NumPart; i++) + Id[pmp[i].index] = i; + + reorder_particles(Id); + + Id += NumGas; + myfree(Id); + pmp += NumGas; + myfree(pmp); + } + + double t1 = second(); + mpi_printf("DOMAIN: Peano-Hilbert order done, took %g sec.\n", timediff(t0, t1)); +} + +/*! \brief Sorts Delaunay Points (DP array) along Peano-Hilbert curve. + * + * \return void + */ +void peano_hilbert_order_DP(void) +{ +#ifdef ONEDIMS + return; +#endif /* #ifdef ONEDIMS */ + + int i; + + if(Mesh.Ndp) + { + pmp = (struct peano_hilbert_data *)mymalloc("pmp", sizeof(struct peano_hilbert_data) * Mesh.Ndp); + Id = (int *)mymalloc("Id", sizeof(int) * Mesh.Ndp); + + point *DP = Mesh.DP; + + for(i = 0; i < Mesh.Ndp; i++) + { + pmp[i].index = i; + pmp[i].key = peano_hilbert_key((int)((DP[i].x + DomainLen) * DomainFac / 3), (int)((DP[i].y + DomainLen) * DomainFac / 3), + (int)((DP[i].z + DomainLen) * DomainFac / 3), BITS_PER_DIMENSION); + } + + mysort_peano(pmp, Mesh.Ndp, sizeof(struct peano_hilbert_data), peano_compare_key); + + for(i = 0; i < Mesh.Ndp; i++) + Id[pmp[i].index] = i; + + reorder_DP(); + + myfree(Id); + myfree(pmp); + } + + mpi_printf("VORONOI: Peano-Hilbert of DP points done.\n"); +} + +/*! \brief Compares two peano_hilbert_data objects with each other. + * + * Sorting kernel for sorting along Peano-Hilbert curve. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a->key < b->key + */ +int peano_compare_key(const void *a, const void *b) +{ + if(((struct peano_hilbert_data *)a)->key < (((struct peano_hilbert_data *)b)->key)) + return -1; + + if(((struct peano_hilbert_data *)a)->key > (((struct peano_hilbert_data *)b)->key)) + return +1; + + return 0; +} + +/*! \brief Rearranges Delaunay points in DP array according to new ordering. + * + * Requires access to an ordering array Id which is as long as the number of + * Delaunay points and contains the new index of each Delaunay point. + * + * \return void + */ +void reorder_DP(void) +{ + int i; + point DPsave, DPsource; + int idsource, idsave, dest; + point *DP = Mesh.DP; + + for(i = 0; i < Mesh.Ndp; i++) + { + if(Id[i] != i) + { + DPsource = DP[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + DPsave = DP[dest]; + idsave = Id[dest]; + + DP[dest] = DPsource; + Id[dest] = idsource; + + if(dest == i) + break; + + DPsource = DPsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +/*! \brief Rearranges gas cells in P and SphP arrays according to new ordering. + * + * \param[in] Id Array which is as long as the number of gas cells and + * which contains the new index of each cell. + * + * \return void + */ +void reorder_gas(int *Id) +{ + int i; + struct particle_data Psave, Psource; + struct sph_particle_data SphPsave, SphPsource; + int idsource, idsave, dest; + + for(i = 0; i < NumGas; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + SphPsource = SphP[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + Psave = P[dest]; + SphPsave = SphP[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + SphP[dest] = SphPsource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + SphPsource = SphPsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +/*! \brief Rearranges particles in P array according to new ordering. + * + * \param[in] Id Array which is as long as the number of particles and + * which contains the new index of each particle. + * + * \return void + */ +void reorder_particles(int *Id) +{ + int i; + struct particle_data Psave, Psource; + int idsource, idsave, dest; + + for(i = NumGas; i < NumPart; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + idsource = Id[i]; + + dest = Id[i]; + + do + { + Psave = P[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +/* The following rewrite of the original function + * peano_hilbert_key_old() has been written by MARTIN REINECKE. + * It is about a factor 2.3 - 2.5 faster than Volker's old routine! + */ +const unsigned char rottable3[48][8] = { + {36, 28, 25, 27, 10, 10, 25, 27}, {29, 11, 24, 24, 37, 11, 26, 26}, {8, 8, 25, 27, 30, 38, 25, 27}, + {9, 39, 24, 24, 9, 31, 26, 26}, {40, 24, 44, 32, 40, 6, 44, 6}, {25, 7, 33, 7, 41, 41, 45, 45}, + {4, 42, 4, 46, 26, 42, 34, 46}, {43, 43, 47, 47, 5, 27, 5, 35}, {33, 35, 36, 28, 33, 35, 2, 2}, + {32, 32, 29, 3, 34, 34, 37, 3}, {33, 35, 0, 0, 33, 35, 30, 38}, {32, 32, 1, 39, 34, 34, 1, 31}, + {24, 42, 32, 46, 14, 42, 14, 46}, {43, 43, 47, 47, 25, 15, 33, 15}, {40, 12, 44, 12, 40, 26, 44, 34}, + {13, 27, 13, 35, 41, 41, 45, 45}, {28, 41, 28, 22, 38, 43, 38, 22}, {42, 40, 23, 23, 29, 39, 29, 39}, + {41, 36, 20, 36, 43, 30, 20, 30}, {37, 31, 37, 31, 42, 40, 21, 21}, {28, 18, 28, 45, 38, 18, 38, 47}, + {19, 19, 46, 44, 29, 39, 29, 39}, {16, 36, 45, 36, 16, 30, 47, 30}, {37, 31, 37, 31, 17, 17, 46, 44}, + {12, 4, 1, 3, 34, 34, 1, 3}, {5, 35, 0, 0, 13, 35, 2, 2}, {32, 32, 1, 3, 6, 14, 1, 3}, + {33, 15, 0, 0, 33, 7, 2, 2}, {16, 0, 20, 8, 16, 30, 20, 30}, {1, 31, 9, 31, 17, 17, 21, 21}, + {28, 18, 28, 22, 2, 18, 10, 22}, {19, 19, 23, 23, 29, 3, 29, 11}, {9, 11, 12, 4, 9, 11, 26, 26}, + {8, 8, 5, 27, 10, 10, 13, 27}, {9, 11, 24, 24, 9, 11, 6, 14}, {8, 8, 25, 15, 10, 10, 25, 7}, + {0, 18, 8, 22, 38, 18, 38, 22}, {19, 19, 23, 23, 1, 39, 9, 39}, {16, 36, 20, 36, 16, 2, 20, 10}, + {37, 3, 37, 11, 17, 17, 21, 21}, {4, 17, 4, 46, 14, 19, 14, 46}, {18, 16, 47, 47, 5, 15, 5, 15}, + {17, 12, 44, 12, 19, 6, 44, 6}, {13, 7, 13, 7, 18, 16, 45, 45}, {4, 42, 4, 21, 14, 42, 14, 23}, + {43, 43, 22, 20, 5, 15, 5, 15}, {40, 12, 21, 12, 40, 6, 23, 6}, {13, 7, 13, 7, 41, 41, 22, 20}}; + +const unsigned char subpix3[48][8] = { + {0, 7, 1, 6, 3, 4, 2, 5}, {7, 4, 6, 5, 0, 3, 1, 2}, {4, 3, 5, 2, 7, 0, 6, 1}, {3, 0, 2, 1, 4, 7, 5, 6}, {1, 0, 6, 7, 2, 3, 5, 4}, + {0, 3, 7, 4, 1, 2, 6, 5}, {3, 2, 4, 5, 0, 1, 7, 6}, {2, 1, 5, 6, 3, 0, 4, 7}, {6, 1, 7, 0, 5, 2, 4, 3}, {1, 2, 0, 3, 6, 5, 7, 4}, + {2, 5, 3, 4, 1, 6, 0, 7}, {5, 6, 4, 7, 2, 1, 3, 0}, {7, 6, 0, 1, 4, 5, 3, 2}, {6, 5, 1, 2, 7, 4, 0, 3}, {5, 4, 2, 3, 6, 7, 1, 0}, + {4, 7, 3, 0, 5, 6, 2, 1}, {6, 7, 5, 4, 1, 0, 2, 3}, {7, 0, 4, 3, 6, 1, 5, 2}, {0, 1, 3, 2, 7, 6, 4, 5}, {1, 6, 2, 5, 0, 7, 3, 4}, + {2, 3, 1, 0, 5, 4, 6, 7}, {3, 4, 0, 7, 2, 5, 1, 6}, {4, 5, 7, 6, 3, 2, 0, 1}, {5, 2, 6, 1, 4, 3, 7, 0}, {7, 0, 6, 1, 4, 3, 5, 2}, + {0, 3, 1, 2, 7, 4, 6, 5}, {3, 4, 2, 5, 0, 7, 1, 6}, {4, 7, 5, 6, 3, 0, 2, 1}, {6, 7, 1, 0, 5, 4, 2, 3}, {7, 4, 0, 3, 6, 5, 1, 2}, + {4, 5, 3, 2, 7, 6, 0, 1}, {5, 6, 2, 1, 4, 7, 3, 0}, {1, 6, 0, 7, 2, 5, 3, 4}, {6, 5, 7, 4, 1, 2, 0, 3}, {5, 2, 4, 3, 6, 1, 7, 0}, + {2, 1, 3, 0, 5, 6, 4, 7}, {0, 1, 7, 6, 3, 2, 4, 5}, {1, 2, 6, 5, 0, 3, 7, 4}, {2, 3, 5, 4, 1, 0, 6, 7}, {3, 0, 4, 7, 2, 1, 5, 6}, + {1, 0, 2, 3, 6, 7, 5, 4}, {0, 7, 3, 4, 1, 6, 2, 5}, {7, 6, 4, 5, 0, 1, 3, 2}, {6, 1, 5, 2, 7, 0, 4, 3}, {5, 4, 6, 7, 2, 3, 1, 0}, + {4, 3, 7, 0, 5, 2, 6, 1}, {3, 2, 0, 1, 4, 5, 7, 6}, {2, 5, 1, 6, 3, 4, 0, 7}}; + +/*! \brief This function computes a Peano-Hilbert key for an integer triplet + * (x,y,z), with x,y,z in the range between 0 and 2^bits-1. + * + * \param[in] x X position. + * \param[in] y Y position. + * \param[in] z Z position. + * \param[in] bits Number of bits used for Peano key. + * + * \return Peano-Hilbert key corresponding to position x,y,z. + */ +peanokey peano_hilbert_key(peano1D x, peano1D y, peano1D z, int bits) +{ + peano1D mask; + unsigned char rotation = 0; + peanokey key = 0; + + for(mask = ((peano1D)1) << (bits - 1); mask > 0; mask >>= 1) + { + unsigned char pix = ((x & mask) ? 4 : 0) | ((y & mask) ? 2 : 0) | ((z & mask) ? 1 : 0); + + key <<= 3; + key |= subpix3[rotation][pix]; + rotation = rottable3[rotation][pix]; + } + + return key; +} + +static int quadrants[24][2][2][2] = { + /* rotx=0, roty=0-3 */ + {{{0, 7}, {1, 6}}, {{3, 4}, {2, 5}}}, + {{{7, 4}, {6, 5}}, {{0, 3}, {1, 2}}}, + {{{4, 3}, {5, 2}}, {{7, 0}, {6, 1}}}, + {{{3, 0}, {2, 1}}, {{4, 7}, {5, 6}}}, + /* rotx=1, roty=0-3 */ + {{{1, 0}, {6, 7}}, {{2, 3}, {5, 4}}}, + {{{0, 3}, {7, 4}}, {{1, 2}, {6, 5}}}, + {{{3, 2}, {4, 5}}, {{0, 1}, {7, 6}}}, + {{{2, 1}, {5, 6}}, {{3, 0}, {4, 7}}}, + /* rotx=2, roty=0-3 */ + {{{6, 1}, {7, 0}}, {{5, 2}, {4, 3}}}, + {{{1, 2}, {0, 3}}, {{6, 5}, {7, 4}}}, + {{{2, 5}, {3, 4}}, {{1, 6}, {0, 7}}}, + {{{5, 6}, {4, 7}}, {{2, 1}, {3, 0}}}, + /* rotx=3, roty=0-3 */ + {{{7, 6}, {0, 1}}, {{4, 5}, {3, 2}}}, + {{{6, 5}, {1, 2}}, {{7, 4}, {0, 3}}}, + {{{5, 4}, {2, 3}}, {{6, 7}, {1, 0}}}, + {{{4, 7}, {3, 0}}, {{5, 6}, {2, 1}}}, + /* rotx=4, roty=0-3 */ + {{{6, 7}, {5, 4}}, {{1, 0}, {2, 3}}}, + {{{7, 0}, {4, 3}}, {{6, 1}, {5, 2}}}, + {{{0, 1}, {3, 2}}, {{7, 6}, {4, 5}}}, + {{{1, 6}, {2, 5}}, {{0, 7}, {3, 4}}}, + /* rotx=5, roty=0-3 */ + {{{2, 3}, {1, 0}}, {{5, 4}, {6, 7}}}, + {{{3, 4}, {0, 7}}, {{2, 5}, {1, 6}}}, + {{{4, 5}, {7, 6}}, {{3, 2}, {0, 1}}}, + {{{5, 2}, {6, 1}}, {{4, 3}, {7, 0}}}}; + +static int rotxmap_table[24] = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 17, 18, 19, 16, 23, 20, 21, 22}; + +static int rotymap_table[24] = {1, 2, 3, 0, 16, 17, 18, 19, 11, 8, 9, 10, 22, 23, 20, 21, 14, 15, 12, 13, 4, 5, 6, 7}; + +static int rotx_table[8] = {3, 0, 0, 2, 2, 0, 0, 1}; +static int roty_table[8] = {0, 1, 1, 2, 2, 3, 3, 0}; + +static int sense_table[8] = {-1, -1, -1, +1, +1, -1, -1, -1}; + +static int flag_quadrants_inverse = 1; +static char quadrants_inverse_x[24][8]; +static char quadrants_inverse_y[24][8]; +static char quadrants_inverse_z[24][8]; + +/*! \brief Computes position from Peano-Hilbert key. + * + * \param[in] key Peano-Hilbert key. + * \param[in] bits Bits used for Peano-Hilbert key. + * \param[out] x X position. + * \param[out] y Y position. + * \param[out] z Z position. + */ +void peano_hilbert_key_inverse(peanokey key, int bits, peano1D *x, peano1D *y, peano1D *z) +{ + if(flag_quadrants_inverse) + { + flag_quadrants_inverse = 0; + for(int rotation = 0; rotation < 24; rotation++) + for(int bitx = 0; bitx < 2; bitx++) + for(int bity = 0; bity < 2; bity++) + for(int bitz = 0; bitz < 2; bitz++) + { + int quad = quadrants[rotation][bitx][bity][bitz]; + quadrants_inverse_x[rotation][quad] = bitx; + quadrants_inverse_y[rotation][quad] = bity; + quadrants_inverse_z[rotation][quad] = bitz; + } + } + + int shift = 3 * (bits - 1); + peanokey mask = ((peanokey)7) << shift; + int rotation = 0; + char sense = 1; + + *x = *y = *z = 0; + + for(int i = 0; i < bits; i++, mask >>= 3, shift -= 3) + { + peanokey keypart = (key & mask) >> shift; + + int quad = (sense == 1) ? (keypart) : (7 - keypart); + + *x = (*x << 1) + quadrants_inverse_x[rotation][quad]; + *y = (*y << 1) + quadrants_inverse_y[rotation][quad]; + *z = (*z << 1) + quadrants_inverse_z[rotation][quad]; + + char rotx = rotx_table[quad]; + char roty = roty_table[quad]; + sense *= sense_table[quad]; + + while(rotx > 0) + { + rotation = rotxmap_table[rotation]; + rotx--; + } + + while(roty > 0) + { + rotation = rotymap_table[rotation]; + roty--; + } + } +} + +/*! \brief Sorting algorithm for sorting along Peano-Hilbert curve. + * + * Merge sort algorithm. + * + * \param[in, out] b Array to be sorted. + * \param[in] n size of array. + * \param[in] t Array for temporary data needed by msort. + * + * \return void + */ +static void msort_peano_with_tmp(struct peano_hilbert_data *b, size_t n, struct peano_hilbert_data *t) +{ + struct peano_hilbert_data *tmp; + struct peano_hilbert_data *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + msort_peano_with_tmp(b1, n1, t); + msort_peano_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(b1->key <= b2->key) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(struct peano_hilbert_data)); + memcpy(b, t, (n - n2) * sizeof(struct peano_hilbert_data)); +} + +/*! \brief Wrapper for sorting algorithm for sorting along Peano-Hilbert curve. + * + * Allocates temporary array and then calls msort_peano_with_tmp. + * This function could be replaced by a call of qsort(b, n, s, cmp), but the + * present merge sort implementation is usually a bit faster for this array. + * + * \param[in, out] b Array to be sorted. + * \param[in] n Size of array. + * \param[in] s Size of single array elements (needed for memory allocation). + * \param[in] cmp Sorting kernel function (obsolete, but still there in case + * an other sorting algorithm should be used). + * + * \return void + */ +void mysort_peano(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)) +{ + const size_t size = n * s; + + struct peano_hilbert_data *tmp = (struct peano_hilbert_data *)mymalloc("tmp", size); + + msort_peano_with_tmp((struct peano_hilbert_data *)b, n, tmp); + + myfree(tmp); +} diff --git a/src/amuse/community/arepo/src/fof/fof.c b/src/amuse/community/arepo/src/fof/fof.c new file mode 100644 index 0000000000..210f07f79c --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof.c @@ -0,0 +1,967 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof.c + * \date 05/2018 + * \brief Parallel friend of friends (FoF) group finder. + * \details contains functions: + * void fof_fof(int num) + * void fof_prepare_output_order(void) + * double fof_get_comoving_linking_length(void) + * void fof_compile_catalogue(void) + * void fof_assign_group_numbers(void) + * void fof_compute_group_properties(int gr, int start, int len) + * void fof_exchange_group_data(void) + * void fof_finish_group_properties(void) + * double fof_periodic(double x) + * double fof_periodic_wrap(double x) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +static MyIDType *MinID; +static int *Head, *Len, *Next, *Tail, *MinIDTask; + +/*! \brief Main routine to execute the friend of friends group finder. + * + * If called with num == -1 as argument, only FOF is carried out and no group + * catalogs are saved to disk. If num >= 0, the code will store the + * group/subgroup catalogs, and bring the particles into output order. + * In this case, the calling routine (which is normally savepositions()) will + * need to free PS[] and bring the particles back into the original order, + * as well as reestablished the mesh. + * + * \param[in] num Index of output; if negative, no output written. + * + * \return void + */ +void fof_fof(int num) +{ + int i, start, lenloc, largestgroup; + double t0, t1, cputime; + + TIMER_START(CPU_FOF); + + mpi_printf("FOF: Begin to compute FoF group catalogue... (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + if(num >= 0 && RestartFlag != 3 && RestartFlag != 6) + { + /* let's discard an existing mesh - we do this here to reduce the peak memory usage, even at the price of + * having to recreate it later */ + free_mesh(); + } + + if(RestartFlag != 6) + { + ngb_treefree(); + + domain_free(); + } + + domain_Decomposition(); + + ngb_treeallocate(); + ngb_treebuild(NumGas); + + /* check */ + for(i = 0; i < NumPart; i++) + if((P[i].Mass == 0 && P[i].ID == 0) || (P[i].Type == 4 && P[i].Mass == 0)) + terminate("this should not happen"); + + /* this structure will hold auxiliary information for each particle, needed only during group finding */ + PS = (struct subfind_data *)mymalloc_movable(&PS, "PS", All.MaxPart * sizeof(struct subfind_data)); + + memset(PS, 0, NumPart * sizeof(struct subfind_data)); + + /* First, we save the original location of the particles, in order to be able to revert to this layout later on */ + for(i = 0; i < NumPart; i++) + { + PS[i].OriginTask = ThisTask; + PS[i].OriginIndex = i; + } + + fof_OldMaxPart = All.MaxPart; + fof_OldMaxPartSph = All.MaxPartSph; + + LinkL = fof_get_comoving_linking_length(); + + mpi_printf("FOF: Comoving linking length: %g (presently allocated=%g MB)\n", LinkL, AllocatedBytes / (1024.0 * 1024.0)); + + MinID = (MyIDType *)mymalloc("MinID", NumPart * sizeof(MyIDType)); + MinIDTask = (int *)mymalloc("MinIDTask", NumPart * sizeof(int)); + + Head = (int *)mymalloc("Head", NumPart * sizeof(int)); + Len = (int *)mymalloc("Len", NumPart * sizeof(int)); + Next = (int *)mymalloc("Next", NumPart * sizeof(int)); + Tail = (int *)mymalloc("Tail", NumPart * sizeof(int)); + +#ifdef HIERARCHICAL_GRAVITY + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestOccupiedTimeBin); +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + construct_forcetree(0, 0, 1, All.HighestOccupiedTimeBin); /* build tree for all particles */ + +#if defined(SUBFIND) + subfind_density_hsml_guess(); +#endif /* #if defined(SUBFIND) */ + + /* initialize link-lists */ + for(i = 0; i < NumPart; i++) + { + Head[i] = Tail[i] = i; + Len[i] = 1; + Next[i] = -1; + MinID[i] = P[i].ID; + MinIDTask[i] = ThisTask; + } + + /* call routine to find primary groups */ + cputime = fof_find_groups(MinID, Head, Len, Next, Tail, MinIDTask); + mpi_printf("FOF: group finding took = %g sec\n", cputime); + +#ifdef FOF_SECONDARY_LINK_TARGET_TYPES + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + + /* now rebuild the tree with all the types selected as secondary link targets */ + construct_forcetree(0, 0, 2, All.HighestOccupiedTimeBin); +#endif /* #ifdef FOF_SECONDARY_LINK_TARGET_TYPES */ + +#ifdef HIERARCHICAL_GRAVITY + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + /* call routine to attach secondary particles/cells to primary groups */ + cputime = fof_find_nearest_dmparticle(MinID, Head, Len, Next, Tail, MinIDTask); + + mpi_printf("FOF: attaching gas and star particles to nearest dm particles took = %g sec\n", cputime); + + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + + myfree(Tail); + myfree(Next); + myfree(Len); + + t0 = second(); + + FOF_PList = (struct fof_particle_list *)mymalloc_movable(&FOF_PList, "FOF_PList", NumPart * sizeof(struct fof_particle_list)); + + for(i = 0; i < NumPart; i++) + { + FOF_PList[i].MinID = MinID[Head[i]]; + FOF_PList[i].MinIDTask = MinIDTask[Head[i]]; + FOF_PList[i].Pindex = i; + } + + myfree_movable(Head); + myfree_movable(MinIDTask); + myfree_movable(MinID); + + FOF_GList = (struct fof_group_list *)mymalloc_movable(&FOF_GList, "FOF_GList", sizeof(struct fof_group_list) * NumPart); + + fof_compile_catalogue(); + + t1 = second(); + mpi_printf("FOF: compiling local group data and catalogue took = %g sec\n", timediff(t0, t1)); + + MPI_Allreduce(&Ngroups, &TotNgroups, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + sumup_large_ints(1, &Nids, &TotNids); + + if(TotNgroups > 0) + { + int largestloc = 0; + + for(i = 0; i < NgroupsExt; i++) + if(FOF_GList[i].LocCount + FOF_GList[i].ExtCount > largestloc) + largestloc = FOF_GList[i].LocCount + FOF_GList[i].ExtCount; + MPI_Allreduce(&largestloc, &largestgroup, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + } + else + largestgroup = 0; + + mpi_printf("FOF: Total number of FOF groups with at least %d particles: %d\n", FOF_GROUP_MIN_LEN, TotNgroups); + mpi_printf("FOF: Largest FOF group has %d particles.\n", largestgroup); + mpi_printf("FOF: Total number of particles in FOF groups: %lld\n", TotNids); + + t0 = second(); + + MaxNgroups = 2 * imax(NgroupsExt, TotNgroups / NTask + 1); + + Group = (struct group_properties *)mymalloc_movable(&Group, "Group", sizeof(struct group_properties) * MaxNgroups); + + mpi_printf("FOF: group properties are now allocated.. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + for(i = 0, start = 0; i < NgroupsExt; i++) + { + while(FOF_PList[start].MinID < FOF_GList[i].MinID) + { + start++; + if(start > NumPart) + terminate("start > NumPart"); + } + + if(FOF_PList[start].MinID != FOF_GList[i].MinID) + terminate("ID mismatch"); + + for(lenloc = 0; start + lenloc < NumPart;) + if(FOF_PList[start + lenloc].MinID == FOF_GList[i].MinID) + lenloc++; + else + break; + + Group[i].MinID = FOF_GList[i].MinID; + Group[i].MinIDTask = FOF_GList[i].MinIDTask; + + fof_compute_group_properties(i, start, lenloc); + + start += lenloc; + } + + fof_exchange_group_data(); + + fof_finish_group_properties(); + + t1 = second(); + mpi_printf("FOF: computation of group properties took = %g sec\n", timediff(t0, t1)); + + fof_assign_group_numbers(); + + mpi_printf("FOF: Finished computing FoF groups. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + myfree_movable(FOF_GList); + myfree_movable(FOF_PList); + +#ifdef SUBFIND + if(num >= 0) + { + TIMER_STOP(CPU_FOF); + + subfind(num); + + TIMER_START(CPU_FOF); + } +#else /* #ifdef SUBFIND */ + Nsubgroups = 0; + TotNsubgroups = 0; + if(num >= 0) + { + TIMER_STOP(CPU_FOF); + TIMER_START(CPU_SNAPSHOT); + + fof_save_groups(num); + + TIMER_STOP(CPU_SNAPSHOT); + TIMER_START(CPU_FOF); + } +#endif /* #ifdef SUBFIND #else */ + + myfree_movable(Group); + + mpi_printf("FOF: All FOF related work finished. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + +#ifndef FOF_STOREIDS + if(num >= 0) + { + TIMER_STOP(CPU_FOF); + TIMER_START(CPU_SNAPSHOT); + + /* now distribute the particles into output order */ + t0 = second(); + fof_prepare_output_order(); + fof_subfind_exchange( + MPI_COMM_WORLD); /* distribute particles such that FOF groups will appear in coherent way in snapshot files */ + t1 = second(); + mpi_printf("FOF: preparing output order of particles took %g sec\n", timediff(t0, t1)); + + TIMER_STOP(CPU_SNAPSHOT); + TIMER_START(CPU_FOF); + } + else + myfree(PS); +#else /* #ifndef FOF_STOREIDS */ + myfree(PS); +#endif /* #ifndef FOF_STOREIDS #else */ + + TIMER_STOP(CPU_FOF); +} + +/*! \brief Sorts groups by the desired output order. + * + * \return void + */ +void fof_prepare_output_order(void) +{ + int i, off, ntype[NTYPES]; + + struct data_aux_sort *aux_sort = (struct data_aux_sort *)mymalloc("aux_sort", sizeof(struct data_aux_sort) * NumPart); + + for(i = 0; i < NTYPES; i++) + ntype[i] = 0; + + for(i = 0; i < NumPart; i++) + { + aux_sort[i].OriginTask = ThisTask; + aux_sort[i].OriginIndex = i; + aux_sort[i].GrNr = PS[i].GrNr; +#ifdef SUBFIND + aux_sort[i].SubNr = PS[i].SubNr; + aux_sort[i].DM_BindingEnergy = PS[i].BindingEnergy; +#endif /* #ifdef SUBFIND */ + aux_sort[i].Type = P[i].Type; + aux_sort[i].ID = P[i].ID; +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + aux_sort[i].FileOrder = P[i].FileOrder; +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + + ntype[P[i].Type]++; + } + + qsort(aux_sort, NumPart, sizeof(struct data_aux_sort), fof_compare_aux_sort_Type); + + if(RestartFlag == 18) + { +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + for(i = 0, off = 0; i < NTYPES; off += ntype[i], i++) + parallel_sort(aux_sort + off, ntype[i], sizeof(struct data_aux_sort), fof_compare_aux_sort_FileOrder); +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + } + else + { + for(i = 0, off = 0; i < NTYPES; off += ntype[i], i++) + parallel_sort(aux_sort + off, ntype[i], sizeof(struct data_aux_sort), fof_compare_aux_sort_GrNr); + } + + for(i = 0; i < NumPart; i++) + { + aux_sort[i].TargetTask = ThisTask; + aux_sort[i].TargetIndex = i; + } + + /* now bring back into starting order */ + parallel_sort(aux_sort, NumPart, sizeof(struct data_aux_sort), fof_compare_aux_sort_OriginTask_OriginIndex); + + for(i = 0; i < NumPart; i++) + { + PS[i].TargetTask = aux_sort[i].TargetTask; + PS[i].TargetIndex = aux_sort[i].TargetIndex; + } + + myfree(aux_sort); +} + +/*! \brief Calculate linking length based on mean particle separation. + * + * \return Linking length. + */ +double fof_get_comoving_linking_length(void) +{ + int i, ndm; + long long ndmtot; + double mass, masstot, rhodm; + + for(i = 0, ndm = 0, mass = 0; i < NumPart; i++) + if(((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + { + ndm++; + mass += P[i].Mass; + } + sumup_large_ints(1, &ndm, &ndmtot); + MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + rhodm = (All.Omega0 - All.OmegaBaryon) * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + return FOF_LINKLENGTH * pow(masstot / ndmtot / rhodm, 1.0 / 3); +} + +/*! \brief Compiles the group catalogue. + * + * Combines results from all tasks. + * + * \return void + */ +void fof_compile_catalogue(void) +{ + int i, j, start, nimport, ngrp, recvTask; + struct fof_group_list *get_FOF_GList; + + /* sort according to MinID */ + mysort(FOF_PList, NumPart, sizeof(struct fof_particle_list), fof_compare_FOF_PList_MinID); + + for(i = 0; i < NumPart; i++) + { + FOF_GList[i].MinID = FOF_PList[i].MinID; + FOF_GList[i].MinIDTask = FOF_PList[i].MinIDTask; + if(FOF_GList[i].MinIDTask == ThisTask) + { + FOF_GList[i].LocCount = 1; + FOF_GList[i].ExtCount = 0; + } + else + { + FOF_GList[i].LocCount = 0; + FOF_GList[i].ExtCount = 1; + } + } + + /* eliminate duplicates in FOF_GList with respect to MinID */ + + if(NumPart) + NgroupsExt = 1; + else + NgroupsExt = 0; + + for(i = 1, start = 0; i < NumPart; i++) + { + if(FOF_GList[i].MinID == FOF_GList[start].MinID) + { + FOF_GList[start].LocCount += FOF_GList[i].LocCount; + FOF_GList[start].ExtCount += FOF_GList[i].ExtCount; + } + else + { + start = NgroupsExt; + FOF_GList[start] = FOF_GList[i]; + NgroupsExt++; + } + } + + /* sort the remaining ones according to task */ + mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinIDTask); + + /* count how many we have of each task */ + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + for(i = 0; i < NgroupsExt; i++) + Send_count[FOF_GList[i].MinIDTask]++; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + if(j == ThisTask) /* we will not exchange the ones that are local */ + Recv_count[j] = 0; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + get_FOF_GList = (struct fof_group_list *)mymalloc("get_FOF_GList", nimport * sizeof(struct fof_group_list)); + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the group info */ + MPI_Sendrecv(&FOF_GList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct fof_group_list), MPI_BYTE, recvTask, + TAG_DENS_A, &get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct fof_group_list), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + for(i = 0; i < nimport; i++) + get_FOF_GList[i].MinIDTask = i; + + /* sort the groups according to MinID */ + mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinID); + mysort(get_FOF_GList, nimport, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinID); + + /* merge the imported ones with the local ones */ + for(i = 0, start = 0; i < nimport; i++) + { + while(FOF_GList[start].MinID < get_FOF_GList[i].MinID) + { + start++; + if(start >= NgroupsExt) + terminate("start >= NgroupsExt"); + } + + if(get_FOF_GList[i].LocCount != 0) + terminate("start >= NgroupsExt"); + + if(FOF_GList[start].MinIDTask != ThisTask) + terminate("FOF_GList[start].MinIDTask != ThisTask"); + + if(FOF_GList[start].MinID != get_FOF_GList[i].MinID) + terminate( + "FOF_GList[start].MinID != get_FOF_GList[i].MinID start=%d i=%d FOF_GList[start].MinID=%llu get_FOF_GList[i].MinID=%llu\n", + start, i, (long long)FOF_GList[start].MinID, (long long)get_FOF_GList[i].MinID); + + FOF_GList[start].ExtCount += get_FOF_GList[i].ExtCount; + } + + /* copy the size information back into the list, to inform the others */ + for(i = 0, start = 0; i < nimport; i++) + { + while(FOF_GList[start].MinID < get_FOF_GList[i].MinID) + { + start++; + if(start >= NgroupsExt) + terminate("start >= NgroupsExt"); + } + + get_FOF_GList[i].ExtCount = FOF_GList[start].ExtCount; + get_FOF_GList[i].LocCount = FOF_GList[start].LocCount; + } + + /* sort the imported/exported list according to MinIDTask */ + mysort(get_FOF_GList, nimport, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinIDTask); + mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinIDTask); + + for(i = 0; i < nimport; i++) + get_FOF_GList[i].MinIDTask = ThisTask; + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the group info */ + MPI_Sendrecv(&get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct fof_group_list), MPI_BYTE, + recvTask, TAG_DENS_A, &FOF_GList[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(struct fof_group_list), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(get_FOF_GList); + + /* eliminate all groups that are too small, and count local groups */ + for(i = 0, Ngroups = 0, Nids = 0; i < NgroupsExt; i++) + { + if(FOF_GList[i].LocCount + FOF_GList[i].ExtCount < FOF_GROUP_MIN_LEN) + { + FOF_GList[i] = FOF_GList[NgroupsExt - 1]; + NgroupsExt--; + i--; + } + else + { + if(FOF_GList[i].MinIDTask == ThisTask) + { + Ngroups++; + Nids += FOF_GList[i].LocCount + FOF_GList[i].ExtCount; + } + } + } + + /* sort the group list according to MinID */ + mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinID); +} + +/*! \brief Assigns each group a global group number. + * + * \return void + */ +void fof_assign_group_numbers(void) +{ + int i, j, ngr, start, lenloc; + long long totNids; + double t0, t1; + + mpi_printf("FOF: start assigning group numbers\n"); + + t0 = second(); + + /* assign group numbers (at this point, both Group and FOF_GList are sorted by MinID) */ + for(i = 0; i < NgroupsExt; i++) + { + FOF_GList[i].LocCount += FOF_GList[i].ExtCount; /* total length */ + FOF_GList[i].ExtCount = ThisTask; /* original task */ + } + + parallel_sort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_LocCountTaskDiffMinID); + + for(i = 0, ngr = 0; i < NgroupsExt; i++) + { + if(FOF_GList[i].ExtCount == FOF_GList[i].MinIDTask) + ngr++; + + FOF_GList[i].GrNr = ngr - 1; + } + + MPI_Allgather(&ngr, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD); + + /* count how many groups there are on earlier CPUs */ + long long ngr_sum; + for(j = 0, ngr_sum = 0; j < ThisTask; j++) + ngr_sum += Send_count[j]; + + for(i = 0; i < NgroupsExt; i++) + FOF_GList[i].GrNr += ngr_sum; + + sumup_large_ints(1, &ngr, &ngr_sum); + if(ngr_sum != TotNgroups) + { + printf("ngr_sum=%d\n", (int)ngr_sum); + terminate("inconsistency"); + } + + /* bring the group list back into the original order */ + parallel_sort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_ExtCountMinID); + + /* Assign the group numbers to the group properties array */ + for(i = 0, start = 0; i < Ngroups; i++) + { + while(FOF_GList[start].MinID < Group[i].MinID) + { + start++; + if(start >= NgroupsExt) + terminate("start >= NgroupsExt"); + } + Group[i].GrNr = FOF_GList[start].GrNr; + } + + /* sort the groups according to group-number */ + parallel_sort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_GrNr); + + for(i = 0; i < NumPart; i++) + PS[i].GrNr = TotNgroups + 1; /* this marks all particles that are not in any group */ + + for(i = 0, start = 0, Nids = 0; i < NgroupsExt; i++) + { + while(FOF_PList[start].MinID < FOF_GList[i].MinID) + { + start++; + if(start > NumPart) + terminate("start > NumPart"); + } + + if(FOF_PList[start].MinID != FOF_GList[i].MinID) + terminate("FOF_PList[start=%d].MinID=%lld != FOF_GList[i=%d].MinID=%lld", start, (long long)FOF_PList[start].MinID, i, + (long long)FOF_GList[i].MinID); + + for(lenloc = 0; start + lenloc < NumPart;) + if(FOF_PList[start + lenloc].MinID == FOF_GList[i].MinID) + { + PS[FOF_PList[start + lenloc].Pindex].GrNr = FOF_GList[i].GrNr; + Nids++; + lenloc++; + } + else + break; + + start += lenloc; + } + + sumup_large_ints(1, &Nids, &totNids); + + if(totNids != TotNids) + { + char buf[1000]; + sprintf(buf, "Task=%d Nids=%d totNids=%d TotNids=%d\n", ThisTask, Nids, (int)totNids, (int)TotNids); + terminate(buf); + } + + t1 = second(); + + mpi_printf("FOF: Assigning of group numbers took = %g sec\n", timediff(t0, t1)); +} + +/*! \brief Computes all kind of properties of groups. + * + * Not complete after calling this. There is still the function + * fof_finish_group_properties, which finalizes the calculation + * (with normalization, averages, unit conversions and other operations). + * + * \param[in] gr Index in Group array. + * \param[in] start Start index in FOF_PList. + * \param[in] len Number of particles in this group. + * + * \return void + */ +void fof_compute_group_properties(int gr, int start, int len) +{ + int j, k, index, type, start_index = FOF_PList[start].Pindex; + double xyz[3]; + + Group[gr].Len = 0; + double gr_Mass = 0; +#ifdef USE_SFR + double gr_Sfr = 0; +#endif /* #ifdef USE_SFR */ + + double gr_CM[3], gr_Vel[3]; + for(k = 0; k < 3; k++) + { + gr_CM[k] = 0; + gr_Vel[k] = 0; + Group[gr].FirstPos[k] = P[start_index].Pos[k]; + } + + double gr_MassType[NTYPES]; + for(k = 0; k < NTYPES; k++) + { + Group[gr].LenType[k] = 0; + gr_MassType[k] = 0; + } + + // calculate + for(k = 0; k < len; k++) + { + index = FOF_PList[start + k].Pindex; + + Group[gr].Len++; + gr_Mass += P[index].Mass; + type = P[index].Type; + + Group[gr].LenType[type]++; + + gr_MassType[type] += P[index].Mass; + +#ifdef USE_SFR + if(P[index].Type == 0) + gr_Sfr += SphP[index].Sfr; +#endif /* #ifdef USE_SFR */ + + for(j = 0; j < 3; j++) + { + xyz[j] = P[index].Pos[j]; + xyz[j] = fof_periodic(xyz[j] - P[start_index].Pos[j]); + gr_CM[j] += P[index].Mass * xyz[j]; + gr_Vel[j] += P[index].Mass * P[index].Vel[j]; + } + } + + // put values into group struct + Group[gr].Mass = gr_Mass; +#ifdef USE_SFR + Group[gr].Sfr = gr_Sfr; +#endif /* #ifdef USE_SFR */ + + for(k = 0; k < 3; k++) + { + Group[gr].CM[k] = gr_CM[k]; + Group[gr].Vel[k] = gr_Vel[k]; + } + + for(k = 0; k < NTYPES; k++) + Group[gr].MassType[k] = gr_MassType[k]; +} + +/*! \brief Global exchange of identified groups to their appropriate task. + * + * \return void + */ +void fof_exchange_group_data(void) +{ + struct group_properties *get_Group; + int i, j, ngrp, recvTask, nimport, start; + double xyz[3]; + + /* sort the groups according to task */ + mysort(Group, NgroupsExt, sizeof(struct group_properties), fof_compare_Group_MinIDTask); + + /* count how many we have of each task */ + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + for(i = 0; i < NgroupsExt; i++) + Send_count[FOF_GList[i].MinIDTask]++; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + if(j == ThisTask) /* we will not exchange the ones that are local */ + Recv_count[j] = 0; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + get_Group = (struct group_properties *)mymalloc("get_Group", sizeof(struct group_properties) * nimport); + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the group data */ + MPI_Sendrecv(&Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct group_properties), MPI_BYTE, recvTask, + TAG_DENS_A, &get_Group[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct group_properties), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + /* sort the groups again according to MinID */ + mysort(Group, NgroupsExt, sizeof(struct group_properties), fof_compare_Group_MinID); + mysort(get_Group, nimport, sizeof(struct group_properties), fof_compare_Group_MinID); + + /* now add in the partial imported group data to the main ones */ + for(i = 0, start = 0; i < nimport; i++) + { + while(Group[start].MinID < get_Group[i].MinID) + { + start++; + if(start >= NgroupsExt) + terminate("start >= NgroupsExt"); + } + + Group[start].Len += get_Group[i].Len; + Group[start].Mass += get_Group[i].Mass; + + for(j = 0; j < NTYPES; j++) + { + Group[start].LenType[j] += get_Group[i].LenType[j]; + Group[start].MassType[j] += get_Group[i].MassType[j]; + } + +#ifdef USE_SFR + Group[start].Sfr += get_Group[i].Sfr; +#endif /* #ifdef USE_SFR */ + + for(j = 0; j < 3; j++) + { + xyz[j] = get_Group[i].CM[j] / get_Group[i].Mass; + xyz[j] = fof_periodic(xyz[j] + get_Group[i].FirstPos[j] - Group[start].FirstPos[j]); + Group[start].CM[j] += get_Group[i].Mass * xyz[j]; + Group[start].Vel[j] += get_Group[i].Vel[j]; + } + } + + myfree(get_Group); +} + +/*! \brief Finalizes group property calculation. + * + * Called after a loop over all particles of a group is already completed. + * + * \return void + */ +void fof_finish_group_properties(void) +{ + double cm[3]; + int i, j, ngr; + + for(i = 0; i < NgroupsExt; i++) + { + if(Group[i].MinIDTask == ThisTask) + { + for(j = 0; j < 3; j++) + { + Group[i].Vel[j] /= Group[i].Mass; + cm[j] = Group[i].CM[j] / Group[i].Mass; + cm[j] = fof_periodic_wrap(cm[j] + Group[i].FirstPos[j]); + Group[i].CM[j] = cm[j]; + } + } + } + + /* eliminate the non-local groups */ + for(i = 0, ngr = NgroupsExt; i < ngr; i++) + { + if(Group[i].MinIDTask != ThisTask) + { + Group[i] = Group[ngr - 1]; + i--; + ngr--; + } + } + + if(ngr != Ngroups) + terminate("ngr != Ngroups"); + + mysort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_MinID); +} + +/*! \brief Do periodic wrap for coordinate. + * + * Note that his works only for cubic box. + * + * \param[in] x Coordinate. + * + * \return coordinate within [-0.5*BoxSize,0.5*BoxSize). + */ +double fof_periodic(double x) +{ +#ifndef GRAVITY_NOT_PERIODIC + if(x >= 0.5 * All.BoxSize) + x -= All.BoxSize; + if(x < -0.5 * All.BoxSize) + x += All.BoxSize; +#endif /* #ifndef GRAVITY_NOT_PERIODIC */ + return x; +} + +/*! \brief Do periodic wrap for coordinate. + * + * Note that his works only for cubic box. + * + * \param[in] x Coordinate. + * + * \return coordinate within [0,BoxSize). + */ +double fof_periodic_wrap(double x) +{ +#ifndef GRAVITY_NOT_PERIODIC + while(x >= All.BoxSize) + x -= All.BoxSize; + while(x < 0) + x += All.BoxSize; +#endif /* #ifndef GRAVITY_NOT_PERIODIC */ + return x; +} + +#endif /* of FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof.h b/src/amuse/community/arepo/src/fof/fof.h new file mode 100644 index 0000000000..e60771b3aa --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof.h @@ -0,0 +1,319 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof.h + * \date 05/2018 + * \brief Header for Friend-of-Friends halo finder. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef FOF_H +#define FOF_H + +#include "../main/allvars.h" + +extern int Ngroups, NgroupsExt, MaxNgroups, TotNgroups, Nsubgroups, TotNsubgroups; +extern int Nids; +extern long long TotNids; + +extern int fof_OldMaxPart; +extern int fof_OldMaxPartSph; + +extern double LinkL; +extern unsigned char *flag_node_inside_linkinglength; + +#define BITFLAG_INSIDE_LINKINGLENGTH 1 + +#ifndef FOF_SECONDARY_LINK_TARGET_TYPES +#define FOF_SECONDARY_LINK_TARGET_TYPES FOF_PRIMARY_LINK_TYPES +#endif + +extern struct group_properties +{ + int Len; + MyIDType MinID; + MyIDType MinIDTask; + int GrNr; + int LenType[NTYPES]; + MyFloat MassType[NTYPES]; + MyFloat Mass; + MyDouble CM[3]; + MyFloat Vel[3]; + MyDouble Pos[3]; + + MyDouble FirstPos[3]; +#ifdef USE_SFR + MyFloat Sfr; +#endif /* #ifdef USE_SFR */ + +#ifdef SUBFIND + int TargetTask; /* primary CPU responsible for this group */ + int Nsubs; + int FirstSub; + MyFloat M_Mean200, R_Mean200; + MyFloat M_Crit200, R_Crit200; + MyFloat M_Crit500, R_Crit500; + MyFloat M_TopHat200, R_TopHat200; +#ifdef SUBFIND_EXTENDED_PROPERTIES + MyFloat J_Mean200[3], JDM_Mean200[3], JGas_Mean200[3], JStars_Mean200[3], MassType_Mean200[NTYPES], CMFrac_Mean200, + CMFracType_Mean200[NTYPES]; + MyFloat J_Crit200[3], JDM_Crit200[3], JGas_Crit200[3], JStars_Crit200[3], MassType_Crit200[NTYPES], CMFrac_Crit200, + CMFracType_Crit200[NTYPES]; + MyFloat J_Crit500[3], JDM_Crit500[3], JGas_Crit500[3], JStars_Crit500[3], MassType_Crit500[NTYPES], CMFrac_Crit500, + CMFracType_Crit500[NTYPES]; + MyFloat J_TopHat200[3], JDM_TopHat200[3], JGas_TopHat200[3], JStars_TopHat200[3], MassType_TopHat200[NTYPES], CMFrac_TopHat200, + CMFracType_TopHat200[NTYPES]; + int LenType_Mean200[NTYPES], LenType_Crit200[NTYPES], LenType_Crit500[NTYPES], LenType_TopHat200[NTYPES]; + MyFloat J[3], JDM[3], JGas[3], JStars[3], CMFrac, CMFracType[NTYPES]; + MyFloat Ekin, Epot, Ethr; + MyFloat Ekin_Crit200, Epot_Crit200, Ethr_Crit200; + MyFloat Ekin_Crit500, Epot_Crit500, Ethr_Crit500; + MyFloat Ekin_Mean200, Epot_Mean200, Ethr_Mean200; + MyFloat Ekin_TopHat200, Epot_TopHat200, Ethr_TopHat200; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ +#endif /* #ifdef SUBFIND */ + +} * Group; + +struct data_aux_sort +{ + int OriginTask, OriginIndex; + int TargetTask, TargetIndex; + int GrNr; + int Type; + MyIDType ID; +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + MyIDType FileOrder; +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ +#ifdef SUBFIND + int SubNr; + MyFloat DM_BindingEnergy; +#endif /* #ifdef SUBFIND */ +}; + +extern struct fof_particle_list +{ + MyIDType MinID; + int MinIDTask; + int Pindex; +} * FOF_PList; + +extern struct fof_group_list +{ + MyIDType MinID; + int MinIDTask; + int LocCount; + int ExtCount; + int GrNr; +} * FOF_GList; + +extern struct id_list +{ + MyIDType ID; + int GrNr; + int Type; +#ifdef SUBFIND + int SubNr; + MyFloat BindingEgy; +#endif /* #ifdef SUBFIND */ +} * ID_list; + +extern struct bit_flags +{ + unsigned char Nonlocal : 2, MinIDChanged : 2, Marked : 2, Changed : 2; +} * Flags; + +struct fof_local_sort_data +{ + int targetindex; + int index; +}; + +extern struct fof_subfind_header +{ + int Ngroups; + int Nsubgroups; + int Nids; + int TotNgroups; + int TotNsubgroups; + long long TotNids; + int num_files; + double time; + double redshift; + double HubbleParam; + double BoxSize; + double Omega0; + double OmegaLambda; + int flag_doubleprecision; +} catalogue_header; + +enum fof_subfind_iofields +{ + IO_FOF_LEN, + IO_FOF_MTOT, + IO_FOF_POS, + IO_FOF_CM, + IO_FOF_VEL, + IO_FOF_LENTYPE, + IO_FOF_MASSTYPE, + IO_FOF_SFR, + + IO_FOF_M_MEAN200, + IO_FOF_R_MEAN200, + IO_FOF_M_CRIT200, + IO_FOF_R_CRIT200, + IO_FOF_M_TOPHAT200, + IO_FOF_R_TOPHAT200, + IO_FOF_M_CRIT500, + IO_FOF_R_CRIT500, + +#ifdef SUBFIND_EXTENDED_PROPERTIES + IO_FOF_J_MEAN200, + IO_FOF_JDM_MEAN200, + IO_FOF_JGAS_MEAN200, + IO_FOF_JSTARS_MEAN200, + IO_FOF_MASSTYPE_MEAN200, + IO_FOF_LENTYPE_MEAN200, + IO_FOF_CMFRAC_MEAN200, + IO_FOF_CMFRACTYPE_MEAN200, + IO_FOF_J_CRIT200, + IO_FOF_JDM_CRIT200, + IO_FOF_JGAS_CRIT200, + IO_FOF_JSTARS_CRIT200, + IO_FOF_MASSTYPE_CRIT200, + IO_FOF_LENTYPE_CRIT200, + IO_FOF_CMFRAC_CRIT200, + IO_FOF_CMFRACTYPE_CRIT200, + IO_FOF_J_TOPHAT200, + IO_FOF_JDM_TOPHAT200, + IO_FOF_JGAS_TOPHAT200, + IO_FOF_JSTARS_TOPHAT200, + IO_FOF_MASSTYPE_TOPHAT200, + IO_FOF_LENTYPE_TOPHAT200, + IO_FOF_CMFRAC_TOPHAT200, + IO_FOF_CMFRACTYPE_TOPHAT200, + IO_FOF_J_CRIT500, + IO_FOF_JDM_CRIT500, + IO_FOF_JGAS_CRIT500, + IO_FOF_JSTARS_CRIT500, + IO_FOF_MASSTYPE_CRIT500, + IO_FOF_LENTYPE_CRIT500, + IO_FOF_CMFRAC_CRIT500, + IO_FOF_CMFRACTYPE_CRIT500, + IO_FOF_J, + IO_FOF_JDM, + IO_FOF_JGAS, + IO_FOF_JSTARS, + IO_FOF_CMFRAC, + IO_FOF_CMFRACTYPE, + IO_FOF_EKIN, + IO_FOF_ETHR, + IO_FOF_EPOT, + IO_FOF_EPOT_CRIT200, + IO_FOF_EKIN_CRIT200, + IO_FOF_ETHR_CRIT200, + IO_FOF_EPOT_MEAN200, + IO_FOF_EKIN_MEAN200, + IO_FOF_ETHR_MEAN200, + IO_FOF_EPOT_TOPHAT200, + IO_FOF_EKIN_TOPHAT200, + IO_FOF_ETHR_TOPHAT200, + IO_FOF_EPOT_CRIT500, + IO_FOF_EKIN_CRIT500, + IO_FOF_ETHR_CRIT500, +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + IO_FOF_NSUBS, + IO_FOF_FIRSTSUB, + IO_FOF_FUZZOFFTYPE, + + IO_SUB_LEN, + IO_SUB_MTOT, + IO_SUB_POS, + IO_SUB_VEL, + IO_SUB_LENTYPE, + IO_SUB_MASSTYPE, + IO_SUB_CM, + IO_SUB_SPIN, + IO_SUB_BFLD_HALO, + IO_SUB_BFLD_DISK, + +#ifdef SUBFIND_EXTENDED_PROPERTIES + IO_SUB_EKIN, + IO_SUB_ETHR, + IO_SUB_EPOT, + IO_SUB_J, + IO_SUB_JDM, + IO_SUB_JGAS, + IO_SUB_JSTARS, + IO_SUB_JINHALFRAD, + IO_SUB_JDMINHALFRAD, + IO_SUB_JGASINHALFRAD, + IO_SUB_JSTARSINHALFRAD, + IO_SUB_JINRAD, + IO_SUB_JDMINRAD, + IO_SUB_JGASINRAD, + IO_SUB_JSTARSINRAD, + IO_SUB_CMFRAC, + IO_SUB_CMFRACTYPE, + IO_SUB_CMFRACINHALFRAD, + IO_SUB_CMFRACTYPEINHALFRAD, + IO_SUB_CMFRACINRAD, + IO_SUB_CMFRACTYPEINRAD, +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + IO_SUB_VELDISP, + IO_SUB_VMAX, + IO_SUB_VMAXRAD, + IO_SUB_HALFMASSRAD, + IO_SUB_HALFMASSRADTYPE, + IO_SUB_MASSINRAD, + IO_SUB_MASSINHALFRAD, + IO_SUB_MASSINMAXRAD, + IO_SUB_MASSINRADTYPE, + IO_SUB_MASSINHALFRADTYPE, + IO_SUB_MASSINMAXRADTYPE, + IO_SUB_IDMOSTBOUND, + IO_SUB_GRNR, + IO_SUB_PARENT, + IO_SUB_SFR, + IO_SUB_SFRINRAD, + IO_SUB_SFRINHALFRAD, + IO_SUB_SFRINMAXRAD, + IO_FOFSUB_IDS, + IO_FOF_LASTENTRY +}; + +int fof_subfind_blockpresent(enum fof_subfind_iofields blocknr); +int fof_subfind_get_datatype(enum fof_subfind_iofields blocknr); +int fof_subfind_get_bytes_per_blockelement(enum fof_subfind_iofields blocknr); +int fof_subfind_get_particles_in_block(enum fof_subfind_iofields blocknr); +void fof_subfind_get_dataset_name(enum fof_subfind_iofields blocknr, char *label); +void fof_subfind_get_Tab_IO_Label(enum fof_subfind_iofields blocknr, char *label); +int fof_subfind_get_dataset_group(enum fof_subfind_iofields blocknr); +void fof_subfind_fill_write_buffer(enum fof_subfind_iofields blocknr, int *startindex, int pc); +int fof_subfind_get_values_per_blockelement(enum fof_subfind_iofields blocknr); + +#endif /* #ifndef FOF_H */ diff --git a/src/amuse/community/arepo/src/fof/fof_distribute.c b/src/amuse/community/arepo/src/fof/fof_distribute.c new file mode 100644 index 0000000000..57c01ff81a --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_distribute.c @@ -0,0 +1,420 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_distribute.c + * \date 05/2018 + * \brief Communication and reordering routines for FoF. + * \details contains functions: + * void fof_subfind_exchange(MPI_Comm Communicator) + * void fof_reorder_PS(int *Id, int Nstart, int N) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +/*! \brief Redistributes the particles according to what is stored in + * PS[].TargetTask, and PS[].TargetIndex. + * + * \param[in] Communicator MPI communicator. + * + * \return void + */ +void fof_subfind_exchange(MPI_Comm Communicator) +{ + int nimport, nexport; + int i, j, n, type, ngrp, target; + int max_load, max_loadsph, load; + struct particle_data *partBuf; + struct subfind_data *subBuf; + struct sph_particle_data *sphBuf; + + int CommThisTask, CommNTask; + + MPI_Comm_size(Communicator, &CommNTask); + MPI_Comm_rank(Communicator, &CommThisTask); + + int old_AllMaxPart = All.MaxPart; + int old_AllMaxPartSph = All.MaxPartSph; + + for(type = 0; type < NTYPES; type++) + { + size_t ExportSpace = 0.5 * (FreeBytes); /* we will try to grab at most half of the still available memory */ + size_t PartSpace = sizeof(struct particle_data) + sizeof(struct subfind_data) + sizeof(struct sph_particle_data); + if(PartSpace > ExportSpace) + terminate("seems like we have insufficient storage, PartSpace=%lld ExportSpace=%lld", (long long)PartSpace, + (long long)ExportSpace); + + int glob_flag = 0; + + do + { + for(n = 0; n < CommNTask; n++) + { + Send_count[n] = 0; + } + + ptrdiff_t AvailableSpace = ExportSpace; /* this must be a type that can become negative */ + + for(n = 0; n < NumPart; n++) + { + if(AvailableSpace < 0) + break; + + if(P[n].Type == type && PS[n].TargetTask != CommThisTask) + { + target = PS[n].TargetTask; + + if(target < 0 || target >= CommNTask) + terminate("n=%d targettask=%d", n, target); + + AvailableSpace -= PartSpace; + + Send_count[target]++; + } + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + + for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < CommNTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* for resize */ + load = (NumPart + nimport - nexport); + MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator); + + if(type == 0) + { + load = (NumGas + nimport - nexport); + MPI_Allreduce(&load, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator); + } + + partBuf = (struct particle_data *)mymalloc_movable(&partBuf, "partBuf", nexport * sizeof(struct particle_data)); + subBuf = (struct subfind_data *)mymalloc_movable(&subBuf, "subBuf", nexport * sizeof(struct subfind_data)); + if(type == 0) + sphBuf = (struct sph_particle_data *)mymalloc_movable(&sphBuf, "sphBuf", nexport * sizeof(struct sph_particle_data)); + + for(i = 0; i < CommNTask; i++) + { + Send_count[i] = 0; + } + + AvailableSpace = ExportSpace; /* this must be allowed to become negative */ + + int nstay = 0; + int delta_numpart = 0; + int delta_numgas = 0; + + for(n = 0; n < NumPart; n++) + { + if(AvailableSpace < 0) + break; + + if(P[n].Type == type && PS[n].TargetTask != CommThisTask) + { + target = PS[n].TargetTask; + + AvailableSpace -= PartSpace; + + partBuf[Send_offset[target] + Send_count[target]] = P[n]; + subBuf[Send_offset[target] + Send_count[target]] = PS[n]; + + if(P[n].Type == 0) + { + sphBuf[Send_offset[target] + Send_count[target]] = SphP[n]; + delta_numgas++; + } + + Send_count[target]++; + delta_numpart++; + } + else + { + if(nstay != n) + { + /* now move P[n] to P[nstay] */ + + P[nstay] = P[n]; + PS[nstay] = PS[n]; + + if(P[nstay].Type == 0) + SphP[nstay] = SphP[n]; + } + + nstay++; + } + } + + if(delta_numgas > 0) + if(delta_numpart != delta_numgas) + terminate("delta_numpart=%d != delta_numgas=%d", delta_numpart, delta_numgas); + + /* now close gap (if present) */ + memmove(P + nstay, P + nstay + delta_numpart, (NumPart - (nstay + delta_numpart)) * sizeof(struct particle_data)); + memmove(PS + nstay, PS + nstay + delta_numpart, (NumPart - (nstay + delta_numpart)) * sizeof(struct subfind_data)); + + if(delta_numgas > 0) + if(NumGas - (nstay + delta_numgas) > 0) + memmove(SphP + nstay, SphP + nstay + delta_numpart, + (NumGas - (nstay + delta_numgas)) * sizeof(struct sph_particle_data)); + + NumPart -= delta_numpart; + NumGas -= delta_numgas; + + /* do resize, but only increase arrays!! (otherwise data in ActiveParticleList etc. gets lost */ + if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart) + { + All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + reallocate_memory_maxpart(); + PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data)); + } + + if(type == 0) + { + if(max_loadsph > (1.0 - ALLOC_TOLERANCE) * All.MaxPartSph) + { + All.MaxPartSph = max_loadsph / (1.0 - 2 * ALLOC_TOLERANCE); + reallocate_memory_maxpartsph(); + } + } + + /* create a gap behind the existing gas particles where we will insert the incoming particles */ + memmove(P + NumGas + nimport, P + NumGas, (NumPart - NumGas) * sizeof(struct particle_data)); + memmove(PS + NumGas + nimport, PS + NumGas, (NumPart - NumGas) * sizeof(struct subfind_data)); + + for(i = 0; i < CommNTask; i++) + Recv_offset[i] += NumGas; + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = CommThisTask ^ ngrp; + + if(target < CommNTask) + { + if(Send_count[target] > 0 || Recv_count[target] > 0) + { + MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA, P + Recv_offset[target], Recv_count[target] * sizeof(struct particle_data), MPI_BYTE, + target, TAG_PDATA, Communicator, MPI_STATUS_IGNORE); + + MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, + TAG_KEY, PS + Recv_offset[target], Recv_count[target] * sizeof(struct subfind_data), MPI_BYTE, + target, TAG_KEY, Communicator, MPI_STATUS_IGNORE); + + if(type == 0) + MPI_Sendrecv(sphBuf + Send_offset[target], Send_count[target] * sizeof(struct sph_particle_data), MPI_BYTE, + target, TAG_SPHDATA, SphP + Recv_offset[target], + Recv_count[target] * sizeof(struct sph_particle_data), MPI_BYTE, target, TAG_SPHDATA, + Communicator, MPI_STATUS_IGNORE); + } + } + } + + if(type == 0) + NumGas += nimport; + + NumPart += nimport; + + if(type == 0) + myfree_movable(sphBuf); + + myfree_movable(subBuf); + myfree_movable(partBuf); + + int loc_flag = 0; + if(AvailableSpace < 0) + loc_flag = 1; + + MPI_Allreduce(&loc_flag, &glob_flag, 1, MPI_INT, MPI_SUM, Communicator); + if(glob_flag > 0 && CommThisTask == 0) + { + printf( + "FOF-DISTRIBUTE: Need to cycle in particle exchange due to memory shortage. type=%d glob_flag=%d ThisTask=%d " + "CommThisTask=%d PartSpace=%lld ExportSpace=%lld\n", + type, glob_flag, ThisTask, CommThisTask, (long long)PartSpace, (long long)ExportSpace); + fflush(stdout); + } + } + while(glob_flag); + } + + /* if there was a temporary memory shortage during the exchange, we may had to increase the maximum allocations. Go back to smaller + * values again if possible */ + + load = NumPart; + MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator); + max_load = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + if(max_load < old_AllMaxPart) + max_load = old_AllMaxPart; + if(max_load != All.MaxPart) + { + All.MaxPart = max_load; + reallocate_memory_maxpart(); + PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data)); + } + + load = NumGas; + MPI_Allreduce(&load, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator); + max_loadsph = max_loadsph / (1.0 - 2 * ALLOC_TOLERANCE); + if(max_loadsph < old_AllMaxPartSph) + max_loadsph = old_AllMaxPartSph; + if(max_loadsph != All.MaxPartSph) + { + All.MaxPartSph = max_loadsph; + reallocate_memory_maxpartsph(); + } + + /* finally, let's also address the desired local order according to PS[].TargetIndex */ + + struct fof_local_sort_data *mp; + int *Id; + + if(NumGas) + { + mp = (struct fof_local_sort_data *)mymalloc("mp", sizeof(struct fof_local_sort_data) * NumGas); + Id = (int *)mymalloc("Id", sizeof(int) * NumGas); + + for(i = 0; i < NumGas; i++) + { + mp[i].index = i; + mp[i].targetindex = PS[i].TargetIndex; + } + + qsort(mp, NumGas, sizeof(struct fof_local_sort_data), fof_compare_local_sort_data_targetindex); + + for(i = 0; i < NumGas; i++) + Id[mp[i].index] = i; + + reorder_gas(Id); + + for(i = 0; i < NumGas; i++) + Id[mp[i].index] = i; + + fof_reorder_PS(Id, 0, NumGas); + + myfree(Id); + myfree(mp); + } + + if(NumPart - NumGas > 0) + { + mp = (struct fof_local_sort_data *)mymalloc("mp", sizeof(struct fof_local_sort_data) * (NumPart - NumGas)); + mp -= NumGas; + + Id = (int *)mymalloc("Id", sizeof(int) * (NumPart - NumGas)); + Id -= NumGas; + + for(i = NumGas; i < NumPart; i++) + { + mp[i].index = i; + mp[i].targetindex = PS[i].TargetIndex; + } + + qsort(mp + NumGas, NumPart - NumGas, sizeof(struct fof_local_sort_data), fof_compare_local_sort_data_targetindex); + + for(i = NumGas; i < NumPart; i++) + Id[mp[i].index] = i; + + reorder_particles(Id); + + for(i = NumGas; i < NumPart; i++) + Id[mp[i].index] = i; + + fof_reorder_PS(Id, NumGas, NumPart); + + Id += NumGas; + myfree(Id); + mp += NumGas; + myfree(mp); + } +} + +/*! \brief Reorders the elements in the PS array according to the indices given + * in the ID array. + * + * \param[in, out] ID Array that specifies new index of element in PS array; + * i.e. PS[i] -> PS[ ID[i] ]. + * \param[in] Nstart Starting index in ID and PS arrays. + * \param[in] N Final element +1 in ID and PS arrays. + * + * \return void + */ +void fof_reorder_PS(int *Id, int Nstart, int N) +{ + int i; + struct subfind_data PSsave, PSsource; + int idsource, idsave, dest; + + for(i = Nstart; i < N; i++) + { + if(Id[i] != i) + { + PSsource = PS[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + PSsave = PS[dest]; + idsave = Id[dest]; + + PS[dest] = PSsource; + Id[dest] = idsource; + + if(dest == i) + break; + + PSsource = PSsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_findgroups.c b/src/amuse/community/arepo/src/fof/fof_findgroups.c new file mode 100644 index 0000000000..55e2ae3d2e --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_findgroups.c @@ -0,0 +1,720 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_findgroups.c + * \date 05/2018 + * \brief Routine to identify friend of friends groups. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * double fof_find_groups(MyIDType * vMinID, int *vHead, + * int *vLen, int *vNext, int *vTail, int *vMinIDTask) + * static int fof_find_dmparticles_evaluate(int target, + * int mode, int threadid) + * static int fof_treefind_fof_primary(MyDouble searchcenter[3], + * MyFloat hsml, int target, int numnodes, int *firstnode, + * int mode, int threadid) + * void fof_check_for_full_nodes_recursive(int no) + * int fof_return_a_particle_in_cell_recursive(int no) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +static int fof_find_dmparticles_evaluate(int target, int mode, int threadid); +static int fof_treefind_fof_primary(MyDouble searchcenter[3], MyFloat hsml, int target, int numnodes, int *firstnode, int mode, + int threadid); + +static int *Tree_Head; + +static MyIDType *MinID; +static int *Head, *Len, *Next, *Tail, *MinIDTask; + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + + MyIDType MinID; + int MinIDTask; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P array. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + + in->MinID = MinID[Head[i]]; + in->MinIDTask = MinIDTask[Head[i]]; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + char link_count_flag; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + terminate("here not used"); + } + else /* combine */ + { + if(out->link_count_flag) + Flags[i].Marked = 1; + } +} + +#include "../utils/generic_comm_helpers2.h" + +static int link_across; +static int nprocessed; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPart) + break; + + if(((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + { + if(Flags[i].Nonlocal && Flags[i].Changed) + { + fof_find_dmparticles_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + + nprocessed++; + } + } + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + link_across += fof_find_dmparticles_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Links particles to groups. + * + * \param[in, out] vMinID Pointer to MinID array. + * \param[in, out] vHead Pointer to Head array. + * \param[in, out] vLen Pointer to Len array. + * \param[in, out] vNext Pointer to Next array. + * \param[in, out] vTail Pointer to Tail array. + * \param[in, out] vMinIDTask Pointer to MinIDTask array. + * + * \return Time spent in this function. + */ +double fof_find_groups(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask) +{ + MinID = vMinID; + Head = vHead; + Len = vLen; + Next = vNext; + Tail = vTail; + MinIDTask = vMinIDTask; + + int i, npart, marked; + long long totmarked, totnpart; + long long link_across_tot, ntot; + double t0, t1, tstart, tend; + + tstart = second(); + + mpi_printf("FOF: Start linking particles (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + /* allocate a flag field that is used to mark nodes that are fully inside the linking length */ + flag_node_inside_linkinglength = (unsigned char *)mymalloc("flag_node_inside_linkinglength", Tree_MaxNodes * sizeof(unsigned char)); + memset(flag_node_inside_linkinglength, 0, Tree_MaxNodes * sizeof(unsigned char)); + flag_node_inside_linkinglength -= Tree_MaxPart; + + Flags = (struct bit_flags *)mymalloc("Flags", NumPart * sizeof(struct bit_flags)); + + generic_set_MaxNexport(); + + Tree_Head = mymalloc("Tree_Head", Tree_NumNodes * sizeof(int)); + Tree_Head -= Tree_MaxPart; + + /* allocate buffers to arrange communication */ + generic_alloc_partlist_nodelist_ngblist_threadbufs(); + + t0 = second(); + + /* first, link only among local particles */ + for(i = 0, marked = 0, npart = 0; i < NumPart; i++) + { + if(((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + { + fof_find_dmparticles_evaluate(i, MODE_LOCAL_NO_EXPORT, 0); + + npart++; + + if(Flags[i].Nonlocal) + marked++; + } + } + + sumup_large_ints(1, &marked, &totmarked); + sumup_large_ints(1, &npart, &totnpart); + t1 = second(); + mpi_printf("FOF: links on local processor done (took %g sec).\nFOF: Marked=%lld out of the %lld primaries which are linked\n", + timediff(t0, t1), totmarked, totnpart); + + generic_free_partlist_nodelist_ngblist_threadbufs(); + + t0 = second(); + fof_check_for_full_nodes_recursive(Tree_MaxPart); + t1 = second(); + mpi_printf("FOF: fully linked nodes determined (took %g sec).\n", timediff(t0, t1)); + mpi_printf("FOF: begin linking across processors (presently allocated=%g MB) \n", AllocatedBytes / (1024.0 * 1024.0)); + + for(i = 0; i < NumPart; i++) + Flags[i].Marked = 1; + + do + { + t0 = second(); + + for(i = 0; i < NumPart; i++) + { + Flags[i].Changed = Flags[i].Marked; + Flags[i].Marked = 0; + Flags[i].MinIDChanged = 0; + } + + NextParticle = 0; /* begin with this index */ + + link_across = 0; + nprocessed = 0; + + generic_comm_pattern(NumPart, kernel_local, kernel_imported); + + sumup_large_ints(1, &link_across, &link_across_tot); + sumup_large_ints(1, &nprocessed, &ntot); + + t1 = second(); + + mpi_printf("FOF: have done %15lld cross links (processed %14lld, took %g sec)\n", link_across_tot, ntot, timediff(t0, t1)); + + /* let's check out which particles have changed their MinID */ + for(i = 0; i < NumPart; i++) + if(Flags[i].Nonlocal) + { + if(Flags[Head[i]].MinIDChanged) + Flags[i].Marked = 1; + } + } + while(link_across_tot > 0); + + Tree_Head += Tree_MaxPart; + myfree(Tree_Head); + myfree(Flags); + /* free flag */ + myfree(flag_node_inside_linkinglength + Tree_MaxPart); + + mpi_printf("FOF: Local groups found.\n"); + + tend = second(); + return timediff(tstart, tend); +} + +/*! \brief Links dark matter particles. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return Number of links. + */ +static int fof_find_dmparticles_evaluate(int target, int mode, int threadid) +{ + int j, n, links, p, s, ss, numnodes, *firstnode; + int numngb; + MyDouble *pos; + data_in local, *target_data; + + links = 0; + + if(mode == MODE_LOCAL_NO_EXPORT || mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + + numngb = fof_treefind_fof_primary(pos, LinkL, target, numnodes, firstnode, mode, threadid); + + if(mode == MODE_LOCAL_PARTICLES || mode == MODE_LOCAL_NO_EXPORT) + for(n = 0; n < numngb; n++) + { + j = Thread[threadid].Ngblist[n]; + + if(Head[target] != Head[j]) /* only if not yet linked */ + { + if(Len[Head[target]] > Len[Head[j]]) /* p group is longer */ + { + p = target; + s = j; + } + else + { + p = j; + s = target; + } + Next[Tail[Head[p]]] = Head[s]; + + Tail[Head[p]] = Tail[Head[s]]; + + Len[Head[p]] += Len[Head[s]]; + + if(MinID[Head[s]] < MinID[Head[p]]) + { + MinID[Head[p]] = MinID[Head[s]]; + MinIDTask[Head[p]] = MinIDTask[Head[s]]; + } + + ss = Head[s]; + do + Head[ss] = Head[p]; + while((ss = Next[ss]) >= 0); + } + } + + if(mode == MODE_IMPORTED_PARTICLES) + { + if(numngb > 0) + DataResult[target].link_count_flag = 1; + else + DataResult[target].link_count_flag = 0; + } + + links += numngb; + + return links; +} + +/*! \brief Finds the neighbors among the primary link types which are within a + * certain distance. + * + * \param[in] searchcenter Position of search center. + * \param[in] hsml Search radius. + * \param[in] target Index of partcle. + * \param[in] numnodes Number of nodes. + * \param[in] fistnode First node. + * \param[in] mode + * -1: only local particles should be found and no export occurs; + * 0: export occurs, but local particles are ignored; + * 1: particles are found for an imported point. + * \param[in] threadid ID of thread. + * + * \return Number of particles found. + */ +static int fof_treefind_fof_primary(MyDouble searchcenter[3], MyFloat hsml, int target, int numnodes, int *firstnode, int mode, + int threadid) +{ + int k, numngb, no, p, nexport_flag = 0; + MyDouble dx, dy, dz, dist, r2; + +#define FACT2 0.866025403785 /* sqrt(3)/2 */ +#define FACT3 (2.0 * FACT2) /* sqrt(3) */ + + MyDouble xtmp, ytmp, ztmp; + + numngb = 0; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES || mode == MODE_LOCAL_NO_EXPORT) + { + no = Tree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Tree_MaxPart) /* single particle */ + { + p = no; + no = Nextnode[no]; + + if(!((1 << P[p].Type) & (FOF_PRIMARY_LINK_TYPES))) + continue; + + if(mode == MODE_LOCAL_PARTICLES) + continue; + + dist = hsml; + dx = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - searchcenter[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - searchcenter[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - searchcenter[2]); + if(dz > dist) + continue; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + if(mode == MODE_IMPORTED_PARTICLES) + { + if(MinID[Head[p]] > DataGet[target].MinID) + { + MinID[Head[p]] = DataGet[target].MinID; + MinIDTask[Head[p]] = DataGet[target].MinIDTask; + Flags[Head[p]].MinIDChanged = 1; + numngb++; + } + } + else + { + /* this will only be done for MODE_LOCAL_NO_EXPORT */ + Thread[threadid].Ngblist[numngb++] = p; + } + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + + if(Tree_Head[no] >= 0) + if(MinID[Tree_Head[no]] <= DataGet[target].MinID) + { + no = Nodes[no].u.d.sibling; /* the node can be discarded */ + continue; + } + } + + struct NODE *current = &Nodes[no]; + int nocur = no; + no = current->u.d.sibling; /* in case the node can be discarded */ + + if(mode == MODE_LOCAL_PARTICLES) + { + if(nocur >= Tree_FirstNonTopLevelNode) + { + /* we have a node with only local particles, hence we can skip it for mode == 0 */ + continue; + } + } + + dist = hsml + 0.5 * current->len; + dx = FOF_NEAREST_LONG_X(current->center[0] - searchcenter[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - searchcenter[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - searchcenter[2]); + if(dz > dist) + continue; + + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + r2 = dx * dx + dy * dy + dz * dz; + if(r2 > dist * dist) + continue; + + if(mode != MODE_LOCAL_PARTICLES) + { + /* test whether the node is contained within the sphere */ + dist = hsml - FACT2 * current->len; + if(dist > 0) + if(r2 < dist * dist && hsml > FACT3 * current->len) + { + if(flag_node_inside_linkinglength[nocur] & (1 << BITFLAG_INSIDE_LINKINGLENGTH)) /* already flagged */ + { + /* sufficient to return only one particle inside this cell */ + p = fof_return_a_particle_in_cell_recursive(nocur); + + if(p >= 0) + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(MinID[Head[p]] > DataGet[target].MinID) + { + MinID[Head[p]] = DataGet[target].MinID; + MinIDTask[Head[p]] = DataGet[target].MinIDTask; + Flags[Head[p]].MinIDChanged = 1; + numngb++; + } + } + else + Thread[threadid].Ngblist[numngb++] = p; + } + + continue; + } + else + { + /* flag it now */ + flag_node_inside_linkinglength[nocur] |= (1 << BITFLAG_INSIDE_LINKINGLENGTH); + } + } + } + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + terminate("do not expect imported points here"); + } + else + { + if(mode == MODE_LOCAL_PARTICLES) + { + if(target >= 0) + tree_treefind_export_node_threads(no, target, threadid); + } + else if(mode == MODE_LOCAL_NO_EXPORT) + { + nexport_flag = 1; + } + else if(mode == MODE_IMPORTED_PARTICLES) + terminate("stop no=%d Tree_MaxPart=%d Tree_MaxNodes=%d", no, Tree_MaxPart, Tree_MaxNodes); + + no = Nextnode[no - Tree_MaxNodes]; + continue; + } + } + } + + if(mode == MODE_LOCAL_NO_EXPORT) + { + if(nexport_flag == 0) + Flags[target].Nonlocal = 0; + else + Flags[target].Nonlocal = 1; + } + + return numngb; +} + +/*! \brief Walks a tree recursively and sets Tree_Head of node. + * + * \param[in] no Index of node we are in. + * + * \return void + */ +void fof_check_for_full_nodes_recursive(int no) +{ + if(no >= Tree_MaxPart && no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + int head = -1; /* no particle yet */ + + int p = Nodes[no].u.d.nextnode; + + while(p != Nodes[no].u.d.sibling) + { + if(p < Tree_MaxPart) /* a particle */ + { + if((1 << P[p].Type) & (FOF_PRIMARY_LINK_TYPES)) + { + if(head == -1) + head = Head[p]; + else if(head >= 0) + { + if(head != Head[p]) + head = -2; + } + } + + p = Nextnode[p]; + } + else if(p < Tree_MaxPart + Tree_MaxNodes) /* an internal node */ + { + fof_check_for_full_nodes_recursive(p); + + if(head == -1) + head = Tree_Head[p]; + else if(head >= 0) + { + if(head != Tree_Head[p]) + head = -2; + } + + p = Nodes[p].u.d.sibling; + } + else /* a pseudo particle */ + p = Nextnode[p - Tree_MaxNodes]; + } + + Tree_Head[no] = head; + } +} + +/*! \brief Finds a particle in node. + * + * \param[in] no Index of node. + * + * \return Particle index; -1 if no particle was found. + */ +int fof_return_a_particle_in_cell_recursive(int no) +{ + if(no >= Tree_MaxPart && no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + int p = Nodes[no].u.d.nextnode; + + while(p != Nodes[no].u.d.sibling) + { + if(p < Tree_MaxPart) /* a particle */ + { + if((1 << P[p].Type) & (FOF_PRIMARY_LINK_TYPES)) + { + return p; + } + + p = Nextnode[p]; + } + else if(p < Tree_MaxPart + Tree_MaxNodes) /* an internal node */ + { + int ret = fof_return_a_particle_in_cell_recursive(p); + + if(ret >= 0) + return ret; + + p = Nodes[p].u.d.sibling; + } + else /* a pseudo particle */ + p = Nextnode[p - Tree_MaxNodes]; + } + } + + return -1; +} + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_io.c b/src/amuse/community/arepo/src/fof/fof_io.c new file mode 100644 index 0000000000..3c0755ab69 --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_io.c @@ -0,0 +1,3151 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_io.c + * \date 05/2018 + * \brief Output functions for parallel FoF; also used by subfind. + * \details contains functions: + * void fof_save_groups(int num) + * void fof_subfind_prepare_ID_list(void) + * void fof_subfind_write_file(char *fname, int writeTask, + * int lastTask) + * void fof_subfind_fill_write_buffer(enum fof_subfind_iofields + * blocknr, int *startindex, int pc) + * void fof_subfind_get_dataset_name(enum fof_subfind_iofields + * blocknr, char *label) + * int fof_subfind_get_dataset_group(enum fof_subfind_iofields + * blocknr) + * int fof_subfind_get_particles_in_block(enum + * fof_subfind_iofields blocknr) + * int fof_subfind_get_values_per_blockelement(enum + * fof_subfind_iofields blocknr) + * int fof_subfind_get_bytes_per_blockelement(enum + * fof_subfind_iofields blocknr) + * int fof_subfind_get_datatype(enum fof_subfind_iofields + * blocknr) + * int fof_subfind_blockpresent(enum fof_subfind_iofields + * blocknr) + * void fof_subfind_get_Tab_IO_Label(enum fof_subfind_iofields + * blocknr, char *label) + * void fof_subfind_write_header_attributes_in_hdf5(hid_t + * handle) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../gitversion/version.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef HAVE_HDF5 +#include +void fof_subfind_write_header_attributes_in_hdf5(hid_t handle); +void write_parameters_attributes_in_hdf5(hid_t handle); +void write_compile_time_options_in_hdf5(hid_t handle); +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef FOF + +/*! \brief Make sure a position lies in the box in case of periodic boundaries. + * + * \param[in] pos Single coordinate in one dimension to be wrapped + * \param[in] dim Index of coordinate [0/1/2] + * + * \return double: wrapped coordinate + */ +MyOutputFloat static wrap_position(MyOutputFloat pos, int dim) +{ +#if defined(REFLECTIVE_X) + if(dim == 0) + return pos; +#endif + +#if defined(REFLECTIVE_Y) + if(dim == 1) + return pos; +#endif + +#if defined(REFLECTIVE_Z) + if(dim == 2) + return pos; +#endif + + double boxsize = All.BoxSize; + +#ifdef LONG_X + if(dim == 0) + boxsize *= LONG_X; +#endif +#ifdef LONG_Y + if(dim == 1) + boxsize *= LONG_Y; +#endif +#ifdef LONG_Z + if(dim == 2) + boxsize *= LONG_Z; +#endif + + while(pos < 0) + pos += boxsize; + + while(pos >= boxsize) + pos -= boxsize; + + return pos; +} + +/*! \brief Main routine for group output. + * + * \param[in] num Index of group file (snapshot index for this output). + * + * \return void + */ +void fof_save_groups(int num) +{ + int filenr, gr, ngrps, masterTask, lastTask; + double t0, t1; + char buf[500]; + +#ifdef FOF_STOREIDS + fof_subfind_prepare_ID_list(); +#endif /* #ifdef FOF_STOREIDS */ + + t0 = second(); + + CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE); + + if(NTask < All.NumFilesPerSnapshot) + { + warn( + "Number of processors must be larger or equal than All.NumFilesPerSnapshot! Reducing All.NumFilesPerSnapshot " + "accordingly.\n"); + All.NumFilesPerSnapshot = NTask; + } + + if(All.SnapFormat < 1 || All.SnapFormat > 3) + mpi_printf("Unsupported File-Format. All.SnapFormat=%d\n", All.SnapFormat); + +#ifndef HAVE_HDF5 + if(All.SnapFormat == 3) + { + mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n"); + } +#endif /* #ifndef HAVE_HDF5 */ + + /* assign processors to output files */ + distribute_file(All.NumFilesPerSnapshot, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask); + + if(All.NumFilesPerSnapshot > 1) + { + if(ThisTask == 0) + { + sprintf(buf, "%s/groups_%03d", All.OutputDir, num); + mkdir(buf, 02755); + } + MPI_Barrier(MPI_COMM_WORLD); + } + + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/groups_%03d/%s_%03d.%d", All.OutputDir, num, "fof_tab", num, filenr); + else + sprintf(buf, "%s%s_%03d", All.OutputDir, "fof_tab", num); + + ngrps = All.NumFilesPerSnapshot / All.NumFilesWrittenInParallel; + if((All.NumFilesPerSnapshot % All.NumFilesWrittenInParallel)) + ngrps++; + + for(gr = 0; gr < ngrps; gr++) + { + if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */ + fof_subfind_write_file(buf, masterTask, lastTask); + + MPI_Barrier(MPI_COMM_WORLD); + } + + myfree(CommBuffer); + +#ifdef FOF_STOREIDS + myfree(ID_list); +#endif /* #ifdef FOF_STOREIDS */ + + t1 = second(); + + mpi_printf("FOF: Group catalogues saved. took = %g sec\n", timediff(t0, t1)); +} + +/*! \brief Prepares ID list for option FOF_STOREIDS. + * + * \return void + */ +void fof_subfind_prepare_ID_list(void) +{ + int i, nids; + long long totNids; + double t0, t1; + + t0 = second(); + + ID_list = mymalloc("ID_list", sizeof(struct id_list) * Nids); + + for(i = 0, nids = 0; i < NumPart; i++) + { + if(PS[i].GrNr < TotNgroups) + { + if(nids >= Nids) + terminate("nids >= Nids"); + + ID_list[nids].GrNr = PS[i].GrNr; + ID_list[nids].Type = P[i].Type; + ID_list[nids].ID = P[i].ID; +#ifdef SUBFIND + ID_list[nids].SubNr = PS[i].SubNr; + ID_list[nids].BindingEgy = PS[i].BindingEnergy; +#endif /* #ifdef SUBFIND */ + nids++; + } + } + + sumup_large_ints(1, &nids, &totNids); + if(totNids != TotNids) + { + char buf[1000]; + sprintf(buf, "Task=%d Nids=%d totNids=%lld TotNids=%lld\n", ThisTask, Nids, totNids, TotNids); + terminate(buf); + } + + /* sort the particle IDs according to group-number, and optionally subhalo number and binding energy */ +#ifdef SUBFIND + parallel_sort(ID_list, Nids, sizeof(struct id_list), subfind_compare_ID_list); +#else /* #ifdef SUBFIND */ + parallel_sort(ID_list, Nids, sizeof(struct id_list), fof_compare_ID_list_GrNrID); +#endif /* #ifdef SUBFIND #else */ + + t1 = second(); + mpi_printf("FOF/SUBFIND: Particle/cell IDs in groups globally sorted. took = %g sec\n", timediff(t0, t1)); +} + +/*! \brief Writes a file with name fname containing data from writeTask to + * lastTask. + * + * \param[in] fname Filename of the output file. + * \param[in] writeTask Task responsible for writing the file. + * \param[in] lastTask Last task whose data is still in this file. + * + * \return void + */ +void fof_subfind_write_file(char *fname, int writeTask, int lastTask) +{ + int bytes_per_blockelement, npart, nextblock; + int n_for_this_task, n, p, pc, offset = 0, task; + int blockmaxlen, n_type[3], ntot_type[3], nn[3]; + enum fof_subfind_iofields blocknr; + char label[8]; + int bnr; + int blksize; + MPI_Status status; + FILE *fd = 0; +#ifdef HAVE_HDF5 + hid_t hdf5_file = 0, hdf5_grp[3], hdf5_headergrp = 0, hdf5_dataspace_memory; + hid_t hdf5_datatype = 0, hdf5_dataspace_in_file = 0, hdf5_dataset = 0; + hid_t hdf5_paramsgrp = 0, hdf5_configgrp = 0; + herr_t hdf5_status; + hsize_t dims[2], count[2], start[2]; + int rank = 0, pcsum = 0; + char buf[1000]; +#endif /* #ifdef HAVE_HDF5 */ + +#define SKIP \ + { \ + my_fwrite(&blksize, sizeof(int), 1, fd); \ + } + + /* determine group/id numbers of each type in file */ + n_type[0] = Ngroups; + n_type[1] = Nsubgroups; + n_type[2] = Nids; + + if(ThisTask == writeTask) + { + for(n = 0; n < 3; n++) + ntot_type[n] = n_type[n]; + + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&nn[0], 3, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + for(n = 0; n < 3; n++) + ntot_type[n] += nn[n]; + } + + for(task = writeTask + 1; task <= lastTask; task++) + MPI_Send(&ntot_type[0], 3, MPI_INT, task, TAG_N, MPI_COMM_WORLD); + } + else + { + MPI_Send(&n_type[0], 3, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Recv(&ntot_type[0], 3, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status); + } + + /* fill file header */ + catalogue_header.Ngroups = ntot_type[0]; + catalogue_header.Nsubgroups = ntot_type[1]; + catalogue_header.Nids = ntot_type[2]; + + catalogue_header.TotNgroups = TotNgroups; + catalogue_header.TotNsubgroups = TotNsubgroups; + catalogue_header.TotNids = TotNids; + + catalogue_header.num_files = All.NumFilesPerSnapshot; + + catalogue_header.time = All.Time; + if(All.ComovingIntegrationOn) + catalogue_header.redshift = 1.0 / All.Time - 1; + else + catalogue_header.redshift = 0; + catalogue_header.HubbleParam = All.HubbleParam; + catalogue_header.BoxSize = All.BoxSize; + catalogue_header.Omega0 = All.Omega0; + catalogue_header.OmegaLambda = All.OmegaLambda; + +#ifdef OUTPUT_IN_DOUBLEPRECISION + catalogue_header.flag_doubleprecision = 1; +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + catalogue_header.flag_doubleprecision = 0; +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + + /* open file and write header */ + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + sprintf(buf, "%s.hdf5", fname); + hdf5_file = my_H5Fcreate(buf, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + mpi_printf("FOF/SUBFIND: writing group catalogue: '%s' (file 1 of %d)\n", fname, All.NumFilesPerSnapshot); + hdf5_headergrp = my_H5Gcreate(hdf5_file, "/Header", 0); + + hdf5_grp[0] = my_H5Gcreate(hdf5_file, "/Group", 0); + hdf5_grp[1] = my_H5Gcreate(hdf5_file, "/Subhalo", 0); + hdf5_grp[2] = my_H5Gcreate(hdf5_file, "/IDs", 0); + + fof_subfind_write_header_attributes_in_hdf5(hdf5_headergrp); + + hdf5_paramsgrp = my_H5Gcreate(hdf5_file, "/Parameters", 0); + write_parameters_attributes_in_hdf5(hdf5_paramsgrp); + + hdf5_configgrp = my_H5Gcreate(hdf5_file, "/Config", 0); + write_compile_time_options_in_hdf5(hdf5_configgrp); + +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + if(!(fd = fopen(fname, "w"))) + { + printf("can't open file `%s' for writing snapshot.\n", fname); + terminate("file open error"); + } + + mpi_printf("FOF/SUBFIND: writing group catalogue: '%s' (file 1 of %d)\n", fname, All.NumFilesPerSnapshot); + + if(All.SnapFormat == 2) + { + blksize = sizeof(int) + 4 * sizeof(char); + SKIP; + my_fwrite((void *)"HEAD", sizeof(char), 4, fd); + nextblock = sizeof(catalogue_header) + 2 * sizeof(int); + my_fwrite(&nextblock, sizeof(int), 1, fd); + SKIP; + } + + blksize = sizeof(catalogue_header); + + SKIP; + my_fwrite(&catalogue_header, sizeof(catalogue_header), 1, fd); + SKIP; + } + } + + for(bnr = 0; bnr < 1000; bnr++) + { + blocknr = (enum fof_subfind_iofields)bnr; + + if(blocknr == IO_FOF_LASTENTRY) + break; + + if(fof_subfind_blockpresent(blocknr)) + { + bytes_per_blockelement = fof_subfind_get_bytes_per_blockelement(blocknr); + + blockmaxlen = (int)(COMMBUFFERSIZE / bytes_per_blockelement); + + npart = fof_subfind_get_particles_in_block(blocknr); + int grp = fof_subfind_get_dataset_group(blocknr); + + if(npart > 0) + { + if(ThisTask == 0) + { + char buf[1000]; + + fof_subfind_get_dataset_name(blocknr, buf); + printf("FOF/SUBFIND: writing block %d (%s)...\n", blocknr, buf); + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 1 || All.SnapFormat == 2) + { + if(All.SnapFormat == 2) + { + blksize = sizeof(int) + 4 * sizeof(char); + SKIP; + fof_subfind_get_Tab_IO_Label(blocknr, label); + my_fwrite(label, sizeof(char), 4, fd); + nextblock = npart * bytes_per_blockelement + 2 * sizeof(int); + my_fwrite(&nextblock, sizeof(int), 1, fd); + SKIP; + } + + blksize = npart * bytes_per_blockelement; + SKIP; + } + else if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + switch(fof_subfind_get_datatype(blocknr)) + { + case 0: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_INT); + break; + case 1: +#ifdef OUTPUT_IN_DOUBLEPRECISION + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + break; + case 2: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT64); + break; + } + + dims[0] = ntot_type[grp]; + dims[1] = fof_subfind_get_values_per_blockelement(blocknr); + if(dims[1] == 1) + rank = 1; + else + rank = 2; + + fof_subfind_get_dataset_name(blocknr, buf); + + hdf5_dataspace_in_file = my_H5Screate_simple(rank, dims, NULL); + + hdf5_dataset = my_H5Dcreate(hdf5_grp[grp], buf, hdf5_datatype, hdf5_dataspace_in_file, H5P_DEFAULT); + + pcsum = 0; +#endif /* #ifdef HAVE_HDF5 */ + } + } + + for(task = writeTask, offset = 0; task <= lastTask; task++) + { + if(task == ThisTask) + { + n_for_this_task = n_type[grp]; + + for(p = writeTask; p <= lastTask; p++) + if(p != ThisTask) + MPI_Send(&n_for_this_task, 1, MPI_INT, p, TAG_NFORTHISTASK, MPI_COMM_WORLD); + } + else + MPI_Recv(&n_for_this_task, 1, MPI_INT, task, TAG_NFORTHISTASK, MPI_COMM_WORLD, &status); + + while(n_for_this_task > 0) + { + pc = n_for_this_task; + + if(pc > blockmaxlen) + pc = blockmaxlen; + + if(ThisTask == task) + fof_subfind_fill_write_buffer(blocknr, &offset, pc); + + if(ThisTask == writeTask && task != writeTask) + MPI_Recv(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA, MPI_COMM_WORLD, &status); + + if(ThisTask != writeTask && task == ThisTask) + MPI_Ssend(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, writeTask, TAG_PDATA, MPI_COMM_WORLD); + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + start[0] = pcsum; + start[1] = 0; + + count[0] = pc; + count[1] = fof_subfind_get_values_per_blockelement(blocknr); + pcsum += pc; + + my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL); + + dims[0] = pc; + dims[1] = fof_subfind_get_values_per_blockelement(blocknr); + hdf5_dataspace_memory = my_H5Screate_simple(rank, dims, NULL); + + hdf5_status = my_H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory, hdf5_dataspace_in_file, + H5P_DEFAULT, CommBuffer, buf); + + (void)hdf5_status; + + my_H5Sclose(hdf5_dataspace_memory, H5S_SIMPLE); +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + my_fwrite(CommBuffer, bytes_per_blockelement, pc, fd); + } + } + + n_for_this_task -= pc; + } + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + my_H5Dclose(hdf5_dataset, buf); + my_H5Sclose(hdf5_dataspace_in_file, H5S_SIMPLE); + my_H5Tclose(hdf5_datatype); +#endif /* #ifdef HAVE_HDF5 */ + } + else + SKIP; + } + } + } + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + my_H5Gclose(hdf5_grp[0], "/Group"); + my_H5Gclose(hdf5_grp[1], "/Subhalo"); + my_H5Gclose(hdf5_grp[2], "/IDs"); + my_H5Gclose(hdf5_headergrp, "/Header"); + my_H5Gclose(hdf5_paramsgrp, "/Parameters"); + my_H5Gclose(hdf5_configgrp, "/Config"); + + my_H5Fclose(hdf5_file, fname); +#endif /* #ifdef HAVE_HDF5 */ + } + else + fclose(fd); + } +} + +/*! \brief Copies data from global group array to appropriate output buffer. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * \param[in] startindex First particle index to be included. + * \param[in] pc Particle count; number of particles to be written. + * + * \return void + */ +void fof_subfind_fill_write_buffer(enum fof_subfind_iofields blocknr, int *startindex, int pc) +{ + int n, k, pindex, *ip; + MyOutputFloat *fp; + MyIDType *idp; + + fp = (MyOutputFloat *)CommBuffer; + ip = (int *)CommBuffer; + idp = (MyIDType *)CommBuffer; + + pindex = *startindex; + + for(n = 0; n < pc; pindex++, n++) + { + switch(blocknr) + { + case IO_FOF_LEN: + *ip++ = Group[pindex].Len; + break; + case IO_FOF_MTOT: + *fp++ = Group[pindex].Mass; + break; + case IO_FOF_POS: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = wrap_position(Group[pindex].Pos[k] - All.GlobalDisplacementVector[k], k); +#else /* #ifdef SUBFIND */ + *fp++ = wrap_position(Group[pindex].CM[k] - All.GlobalDisplacementVector[k], k); +#endif /* #ifdef SUBFIND #else */ + break; + case IO_FOF_CM: + for(k = 0; k < 3; k++) + *fp++ = wrap_position(Group[pindex].CM[k] - All.GlobalDisplacementVector[k], k); + break; + case IO_FOF_VEL: + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].Vel[k]; + break; + case IO_FOF_LENTYPE: + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType[k]; + break; + case IO_FOF_MASSTYPE: + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType[k]; + break; + case IO_FOF_SFR: +#ifdef USE_SFR + *fp++ = Group[pindex].Sfr; +#endif /* #ifdef USE_SFR */ + break; + case IO_FOF_M_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].M_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_R_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].R_Mean200; +#endif /* #ifdef SUBFIND */ + break; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM_MEAN200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS_MEAN200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS_MEAN200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_MASSTYPE_MEAN200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_LENTYPE_MEAN200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE_MEAN200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_J_CRIT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM_CRIT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS_CRIT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS_CRIT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_MASSTYPE_CRIT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_LENTYPE_CRIT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE_CRIT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_J_CRIT500: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM_CRIT500: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS_CRIT500: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS_CRIT500: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_MASSTYPE_CRIT500: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_LENTYPE_CRIT500: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE_CRIT500: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_J_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_MASSTYPE_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_LENTYPE_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_J: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_EKIN: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Ekin; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_ETHR: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Ethr; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_EPOT: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Epot; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_J: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].J[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JDM: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jdm[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JGAS: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jgas[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JSTARS: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jstars[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JINHALFRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].J_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JDMINHALFRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jdm_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JGASINHALFRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jgas_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JSTARSINHALFRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jstars_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JINRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].J_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JDMINRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jdm_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JGASINRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jgas_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JSTARSINRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jstars_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRAC: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].CMFrac; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].CMFracType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACINHALFRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].CMFrac_inHalfRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACTYPEINHALFRAD: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].CMFracType_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACINRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].CMFrac_inRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACTYPEINRAD: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].CMFracType_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + break; + case IO_FOF_M_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].M_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_R_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].R_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_M_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].M_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_R_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].R_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_M_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].M_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_R_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].R_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_NSUBS: +#ifdef SUBFIND + *ip++ = Group[pindex].Nsubs; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_FIRSTSUB: +#ifdef SUBFIND + *ip++ = Group[pindex].FirstSub; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_FUZZOFFTYPE: + break; + case IO_SUB_LEN: +#ifdef SUBFIND + *ip++ = SubGroup[pindex].Len; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MTOT: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Mass; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_POS: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = wrap_position(SubGroup[pindex].Pos[k] - All.GlobalDisplacementVector[k], k); +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_VEL: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = SubGroup[pindex].Vel[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_LENTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = SubGroup[pindex].LenType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].MassType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CM: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = wrap_position(SubGroup[pindex].CM[k] - All.GlobalDisplacementVector[k], k); +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_SPIN: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Spin[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_VELDISP: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubVelDisp; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_VMAX: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubVmax; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_VMAXRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubVmaxRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_HALFMASSRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubHalfMassRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_HALFMASSRADTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].SubHalfMassRadType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubMassInRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINRADTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].SubMassInRadType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINHALFRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubMassInHalfRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINHALFRADTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].SubMassInHalfRadType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINMAXRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubMassInMaxRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINMAXRADTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].SubMassInMaxRadType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_IDMOSTBOUND: +#ifdef SUBFIND + *idp++ = SubGroup[pindex].SubMostBoundID; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_GRNR: +#ifdef SUBFIND + *ip++ = SubGroup[pindex].GrNr; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_PARENT: +#ifdef SUBFIND + *ip++ = SubGroup[pindex].SubParent; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_BFLD_HALO: +#if defined(MHD) && defined(SUBFIND) + *fp++ = SubGroup[pindex].Bfld_Halo * sqrt(4. * M_PI); +#endif /* #if defined(MHD) && defined(SUBFIND) */ + break; + case IO_SUB_BFLD_DISK: +#if defined(MHD) && defined(SUBFIND) + *fp++ = SubGroup[pindex].Bfld_Disk * sqrt(4. * M_PI); +#endif /* #if defined(MHD) && defined(SUBFIND) */ + break; + case IO_SUB_SFR: +#if defined(USE_SFR) && defined(SUBFIND) + *fp++ = SubGroup[pindex].Sfr; +#endif /* #if defined(USE_SFR) && defined(SUBFIND) */ + break; + case IO_SUB_SFRINRAD: +#if defined(USE_SFR) && defined(SUBFIND) + *fp++ = SubGroup[pindex].SfrInRad; +#endif /* #if defined(USE_SFR) && defined(SUBFIND) */ + break; + case IO_SUB_SFRINHALFRAD: +#if defined(USE_SFR) && defined(SUBFIND) + *fp++ = SubGroup[pindex].SfrInHalfRad; +#endif /* #if defined(USE_SFR) && defined(SUBFIND) */ + break; + case IO_SUB_SFRINMAXRAD: +#if defined(USE_SFR) && defined(SUBFIND) + *fp++ = SubGroup[pindex].SfrInMaxRad; +#endif /* #if defined(USE_SFR) && defined(SUBFIND) */ + break; + case IO_FOFSUB_IDS: +#ifdef FOF_STOREIDS + *idp++ = ID_list[pindex].ID; +#endif /* #ifdef FOF_STOREIDS */ + break; + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } + } +} + +/*! \brief Associates the output variable blocknumber with its name. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * \param[out] label Name of field. + * + * \return void + */ +void fof_subfind_get_dataset_name(enum fof_subfind_iofields blocknr, char *label) +{ + switch(blocknr) + { + case IO_FOF_LEN: + strcpy(label, "GroupLen"); + break; + case IO_FOF_MTOT: + strcpy(label, "GroupMass"); + break; + case IO_FOF_POS: + strcpy(label, "GroupPos"); + break; + case IO_FOF_CM: + strcpy(label, "GroupCM"); + break; + case IO_FOF_VEL: + strcpy(label, "GroupVel"); + break; + case IO_FOF_LENTYPE: + strcpy(label, "GroupLenType"); + break; + case IO_FOF_MASSTYPE: + strcpy(label, "GroupMassType"); + break; + case IO_FOF_SFR: + strcpy(label, "GroupSFR"); + break; + case IO_FOF_M_MEAN200: + strcpy(label, "Group_M_Mean200"); + break; + case IO_FOF_R_MEAN200: + strcpy(label, "Group_R_Mean200"); + break; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + strcpy(label, "Group_J_Mean200"); + break; + case IO_FOF_JDM_MEAN200: + strcpy(label, "Group_Jdm_Mean200"); + break; + case IO_FOF_JGAS_MEAN200: + strcpy(label, "Group_Jgas_Mean200"); + break; + case IO_FOF_JSTARS_MEAN200: + strcpy(label, "Group_Jstars_Mean200"); + break; + case IO_FOF_MASSTYPE_MEAN200: + strcpy(label, "Group_MassType_Mean200"); + break; + case IO_FOF_LENTYPE_MEAN200: + strcpy(label, "Group_LenType_Mean200"); + break; + case IO_FOF_CMFRAC_MEAN200: + strcpy(label, "Group_CMFrac_Mean200"); + break; + case IO_FOF_CMFRACTYPE_MEAN200: + strcpy(label, "Group_CMFracType_Mean200"); + break; + case IO_FOF_J_CRIT200: + strcpy(label, "Group_J_Crit200"); + break; + case IO_FOF_JDM_CRIT200: + strcpy(label, "Group_Jdm_Crit200"); + break; + case IO_FOF_JGAS_CRIT200: + strcpy(label, "Group_Jgas_Crit200"); + break; + case IO_FOF_JSTARS_CRIT200: + strcpy(label, "Group_Jstars_Crit200"); + break; + case IO_FOF_MASSTYPE_CRIT200: + strcpy(label, "Group_MassType_Crit200"); + break; + case IO_FOF_LENTYPE_CRIT200: + strcpy(label, "Group_LenType_Crit200"); + break; + case IO_FOF_CMFRAC_CRIT200: + strcpy(label, "Group_CMFrac_Crit200"); + break; + case IO_FOF_CMFRACTYPE_CRIT200: + strcpy(label, "Group_CMFracType_Crit200"); + break; + case IO_FOF_J_CRIT500: + strcpy(label, "Group_J_Crit500"); + break; + case IO_FOF_JDM_CRIT500: + strcpy(label, "Group_Jdm_Crit500"); + break; + case IO_FOF_JGAS_CRIT500: + strcpy(label, "Group_Jgas_Crit500"); + break; + case IO_FOF_JSTARS_CRIT500: + strcpy(label, "Group_Jstars_Crit500"); + break; + case IO_FOF_MASSTYPE_CRIT500: + strcpy(label, "Group_MassType_Crit500"); + break; + case IO_FOF_LENTYPE_CRIT500: + strcpy(label, "Group_LenType_Crit500"); + break; + case IO_FOF_CMFRAC_CRIT500: + strcpy(label, "Group_CMFrac_Crit500"); + break; + case IO_FOF_CMFRACTYPE_CRIT500: + strcpy(label, "Group_CMFracType_Crit500"); + break; + case IO_FOF_J_TOPHAT200: + strcpy(label, "Group_J_TopHat200"); + break; + case IO_FOF_JDM_TOPHAT200: + strcpy(label, "Group_Jdm_TopHat200"); + break; + case IO_FOF_JGAS_TOPHAT200: + strcpy(label, "Group_Jgas_TopHat200"); + break; + case IO_FOF_JSTARS_TOPHAT200: + strcpy(label, "Group_Jstars_TopHat200"); + break; + case IO_FOF_MASSTYPE_TOPHAT200: + strcpy(label, "Group_MassType_TopHat200"); + break; + case IO_FOF_LENTYPE_TOPHAT200: + strcpy(label, "Group_LenType_TopHat200"); + break; + case IO_FOF_CMFRAC_TOPHAT200: + strcpy(label, "Group_CMFrac_TopHat200"); + break; + case IO_FOF_CMFRACTYPE_TOPHAT200: + strcpy(label, "Group_CMFracType_TopHat200"); + break; + case IO_FOF_EPOT_CRIT200: + strcpy(label, "Group_Epot_Crit200"); + break; + case IO_FOF_EKIN_CRIT200: + strcpy(label, "Group_Ekin_Crit200"); + break; + case IO_FOF_ETHR_CRIT200: + strcpy(label, "Group_Ethr_Crit200"); + break; + case IO_FOF_EPOT_MEAN200: + strcpy(label, "Group_Epot_Mean200"); + break; + case IO_FOF_EKIN_MEAN200: + strcpy(label, "Group_Ekin_Mean200"); + break; + case IO_FOF_ETHR_MEAN200: + strcpy(label, "Group_Ethr_Mean200"); + break; + case IO_FOF_EPOT_TOPHAT200: + strcpy(label, "Group_Epot_TopHat200"); + break; + case IO_FOF_EKIN_TOPHAT200: + strcpy(label, "Group_Ekin_TopHat200"); + break; + case IO_FOF_ETHR_TOPHAT200: + strcpy(label, "Group_Ethr_TopHat200"); + break; + case IO_FOF_EPOT_CRIT500: + strcpy(label, "Group_Epot_Crit500"); + break; + case IO_FOF_EKIN_CRIT500: + strcpy(label, "Group_Ekin_Crit500"); + break; + case IO_FOF_ETHR_CRIT500: + strcpy(label, "Group_Ethr_Crit500"); + break; + case IO_FOF_J: + strcpy(label, "Group_J"); + break; + case IO_FOF_JDM: + strcpy(label, "Group_Jdm"); + break; + case IO_FOF_JGAS: + strcpy(label, "Group_Jgas"); + break; + case IO_FOF_JSTARS: + strcpy(label, "Group_Jstars"); + break; + case IO_FOF_CMFRAC: + strcpy(label, "Group_CMFrac"); + break; + case IO_FOF_CMFRACTYPE: + strcpy(label, "Group_CMFracType"); + break; + case IO_FOF_EKIN: + strcpy(label, "GroupEkin"); + break; + case IO_FOF_ETHR: + strcpy(label, "GroupEthr"); + break; + case IO_FOF_EPOT: + strcpy(label, "GroupEpot"); + break; + case IO_SUB_EKIN: + strcpy(label, "SubhaloEkin"); + break; + case IO_SUB_ETHR: + strcpy(label, "SubhaloEthr"); + break; + case IO_SUB_EPOT: + strcpy(label, "SubhaloEpot"); + break; + case IO_SUB_J: + strcpy(label, "Subhalo_J"); + break; + case IO_SUB_JDM: + strcpy(label, "Subhalo_Jdm"); + break; + case IO_SUB_JGAS: + strcpy(label, "Subhalo_Jgas"); + break; + case IO_SUB_JSTARS: + strcpy(label, "Subhalo_Jstars"); + break; + case IO_SUB_JINHALFRAD: + strcpy(label, "Subhalo_JInHalfRad"); + break; + case IO_SUB_JDMINHALFRAD: + strcpy(label, "Subhalo_JdmInHalfRad"); + break; + case IO_SUB_JGASINHALFRAD: + strcpy(label, "Subhalo_JgasInHalfRad"); + break; + case IO_SUB_JSTARSINHALFRAD: + strcpy(label, "Subhalo_JstarsInHalfRad"); + break; + case IO_SUB_JINRAD: + strcpy(label, "Subhalo_JInRad"); + break; + case IO_SUB_JDMINRAD: + strcpy(label, "Subhalo_JdmInRad"); + break; + case IO_SUB_JGASINRAD: + strcpy(label, "Subhalo_JgasInRad"); + break; + case IO_SUB_JSTARSINRAD: + strcpy(label, "Subhalo_JstarsInRad"); + break; + case IO_SUB_CMFRAC: + strcpy(label, "Subhalo_CMFrac"); + break; + case IO_SUB_CMFRACTYPE: + strcpy(label, "Subhalo_CMFracType"); + break; + case IO_SUB_CMFRACINHALFRAD: + strcpy(label, "Subhalo_CMFracInHalfRad"); + break; + case IO_SUB_CMFRACTYPEINHALFRAD: + strcpy(label, "Subhalo_CMFracTypeInHalfRad"); + break; + case IO_SUB_CMFRACINRAD: + strcpy(label, "Subhalo_CMFracInRad"); + break; + case IO_SUB_CMFRACTYPEINRAD: + strcpy(label, "Subhalo_CMFracTypeInRad"); + break; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + case IO_FOF_M_CRIT200: + strcpy(label, "Group_M_Crit200"); + break; + case IO_FOF_R_CRIT200: + strcpy(label, "Group_R_Crit200"); + break; + case IO_FOF_M_CRIT500: + strcpy(label, "Group_M_Crit500"); + break; + case IO_FOF_R_CRIT500: + strcpy(label, "Group_R_Crit500"); + break; + case IO_FOF_M_TOPHAT200: + strcpy(label, "Group_M_TopHat200"); + break; + case IO_FOF_R_TOPHAT200: + strcpy(label, "Group_R_TopHat200"); + break; + case IO_FOF_NSUBS: + strcpy(label, "GroupNsubs"); + break; + case IO_FOF_FIRSTSUB: + strcpy(label, "GroupFirstSub"); + break; + case IO_FOF_FUZZOFFTYPE: + strcpy(label, "GroupFuzzOffsetType"); + break; + case IO_SUB_LEN: + strcpy(label, "SubhaloLen"); + break; + case IO_SUB_MTOT: + strcpy(label, "SubhaloMass"); + break; + case IO_SUB_POS: + strcpy(label, "SubhaloPos"); + break; + case IO_SUB_VEL: + strcpy(label, "SubhaloVel"); + break; + case IO_SUB_LENTYPE: + strcpy(label, "SubhaloLenType"); + break; + case IO_SUB_MASSTYPE: + strcpy(label, "SubhaloMassType"); + break; + case IO_SUB_CM: + strcpy(label, "SubhaloCM"); + break; + case IO_SUB_SPIN: + strcpy(label, "SubhaloSpin"); + break; + case IO_SUB_VELDISP: + strcpy(label, "SubhaloVelDisp"); + break; + case IO_SUB_VMAX: + strcpy(label, "SubhaloVmax"); + break; + case IO_SUB_VMAXRAD: + strcpy(label, "SubhaloVmaxRad"); + break; + case IO_SUB_HALFMASSRAD: + strcpy(label, "SubhaloHalfmassRad"); + break; + case IO_SUB_HALFMASSRADTYPE: + strcpy(label, "SubhaloHalfmassRadType"); + break; + case IO_SUB_MASSINRAD: + strcpy(label, "SubhaloMassInRad"); + break; + case IO_SUB_MASSINHALFRAD: + strcpy(label, "SubhaloMassInHalfRad"); + break; + case IO_SUB_MASSINMAXRAD: + strcpy(label, "SubhaloMassInMaxRad"); + break; + case IO_SUB_MASSINRADTYPE: + strcpy(label, "SubhaloMassInRadType"); + break; + case IO_SUB_MASSINHALFRADTYPE: + strcpy(label, "SubhaloMassInHalfRadType"); + break; + case IO_SUB_MASSINMAXRADTYPE: + strcpy(label, "SubhaloMassInMaxRadType"); + break; + case IO_SUB_IDMOSTBOUND: + strcpy(label, "SubhaloIDMostbound"); + break; + case IO_SUB_GRNR: + strcpy(label, "SubhaloGrNr"); + break; + case IO_SUB_PARENT: + strcpy(label, "SubhaloParent"); + break; + case IO_SUB_BFLD_HALO: + strcpy(label, "SubhaloBfldHalo"); + break; + case IO_SUB_BFLD_DISK: + strcpy(label, "SubhaloBfldDisk"); + break; + case IO_SUB_SFR: + strcpy(label, "SubhaloSFR"); + break; + case IO_SUB_SFRINRAD: + strcpy(label, "SubhaloSFRinRad"); + break; + case IO_SUB_SFRINHALFRAD: + strcpy(label, "SubhaloSFRinHalfRad"); + break; + case IO_SUB_SFRINMAXRAD: + strcpy(label, "SubhaloSFRinMaxRad"); + break; + case IO_FOFSUB_IDS: + strcpy(label, "ID"); + break; + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } +} + +/*! \brief Is this output field a group or subhalo property? + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return 0: group property; 1 subhalo property; 2: both (unused) + */ +int fof_subfind_get_dataset_group(enum fof_subfind_iofields blocknr) +{ + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_MTOT: + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_FOF_LENTYPE: + case IO_FOF_MASSTYPE: + case IO_FOF_SFR: + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_FOF_FUZZOFFTYPE: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_LENTYPE_TOPHAT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: + case IO_FOF_CMFRAC: + case IO_FOF_CMFRACTYPE: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + return 0; + + case IO_SUB_LEN: + case IO_SUB_MTOT: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_LENTYPE: + case IO_SUB_MASSTYPE: + case IO_SUB_CM: + case IO_SUB_SPIN: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_SUB_IDMOSTBOUND: + case IO_SUB_GRNR: + case IO_SUB_PARENT: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + return 1; + + case IO_FOFSUB_IDS: + return 2; + + case IO_FOF_LASTENTRY: + terminate("reached last entry in switch - strange."); + break; + } + + terminate("reached end of function - this should not happen"); + return 0; +} + +/*! \brief Returns number of particles of specific field. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return Number of entries of this property. + */ +int fof_subfind_get_particles_in_block(enum fof_subfind_iofields blocknr) +{ + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_MTOT: + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_FOF_LENTYPE: + case IO_FOF_MASSTYPE: + case IO_FOF_SFR: + case IO_FOF_FUZZOFFTYPE: + return catalogue_header.Ngroups; + + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_LENTYPE_TOPHAT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: + case IO_FOF_CMFRAC: + case IO_FOF_CMFRACTYPE: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef SUBFIND + return catalogue_header.Ngroups; +#else /* #ifdef SUBFIND */ + return 0; +#endif /* #ifdef SUBFIND #else */ + + case IO_SUB_LEN: + case IO_SUB_MTOT: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_LENTYPE: + case IO_SUB_MASSTYPE: + case IO_SUB_CM: + case IO_SUB_SPIN: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_SUB_IDMOSTBOUND: + case IO_SUB_GRNR: + case IO_SUB_PARENT: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef SUBFIND + return catalogue_header.Nsubgroups; +#else /* #ifdef SUBFIND */ + return 0; +#endif /* #ifdef SUBFIND #else */ + + case IO_FOFSUB_IDS: + return catalogue_header.Nids; + + case IO_FOF_LASTENTRY: + terminate("reached last entry in switch - strange."); + break; + } + + terminate("reached end of function - this should not happen"); + return 0; +} + +/*! \brief Returns the number of elements per entry of a given property. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return Number of values per element of the specified property. + */ +int fof_subfind_get_values_per_blockelement(enum fof_subfind_iofields blocknr) +{ + int values = 0; + + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_SUB_LEN: + case IO_SUB_GRNR: + case IO_SUB_PARENT: + case IO_FOF_MTOT: + case IO_FOF_SFR: + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_SUB_MTOT: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_IDMOSTBOUND: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: + case IO_FOFSUB_IDS: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_FOF_CMFRAC: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + values = 1; + break; + + case IO_FOF_LENTYPE: + case IO_SUB_LENTYPE: + case IO_FOF_MASSTYPE: + case IO_SUB_MASSTYPE: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_FOF_FUZZOFFTYPE: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_CMFRACTYPE: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACTYPEINRAD: + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_LENTYPE_TOPHAT200: + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_CMFRACTYPE_TOPHAT200: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + values = NTYPES; + break; + + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_CM: + case IO_SUB_SPIN: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + values = 3; + break; + + case IO_FOF_LASTENTRY: + terminate("reached last entry in switch - should not get here"); + break; + } + return values; +} + +/*! \brief Returns the number of bytes per element of a given property. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return Number of bytes per element for this property. + */ +int fof_subfind_get_bytes_per_blockelement(enum fof_subfind_iofields blocknr) +{ + int bytes_per_blockelement = 0; + + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_SUB_LEN: + case IO_SUB_GRNR: + case IO_SUB_PARENT: + bytes_per_blockelement = sizeof(int); + break; + + case IO_FOF_LENTYPE: + case IO_SUB_LENTYPE: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_LENTYPE_TOPHAT200: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + bytes_per_blockelement = NTYPES * sizeof(int); + break; + + case IO_FOF_MTOT: + case IO_FOF_SFR: + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_SUB_MTOT: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRAC: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + bytes_per_blockelement = sizeof(MyOutputFloat); + break; + + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_CM: + case IO_SUB_SPIN: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + bytes_per_blockelement = 3 * sizeof(MyOutputFloat); + break; + + case IO_FOF_MASSTYPE: + case IO_SUB_MASSTYPE: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_CMFRACTYPE: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + bytes_per_blockelement = NTYPES * sizeof(MyOutputFloat); + break; + + case IO_SUB_IDMOSTBOUND: + case IO_FOFSUB_IDS: + bytes_per_blockelement = sizeof(MyIDType); + break; + + case IO_FOF_FUZZOFFTYPE: + bytes_per_blockelement = NTYPES * sizeof(long long); + break; + + case IO_FOF_LASTENTRY: + terminate("reached last entry in switch - should not get here"); + break; + } + return bytes_per_blockelement; +} + +/*! \brief Returns key for datatype of element of a given property. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return Key for datatype: 0: int, 1: (output)float, 2: long long. + */ +int fof_subfind_get_datatype(enum fof_subfind_iofields blocknr) +{ + int typekey = 0; + + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_LENTYPE: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_SUB_LEN: + case IO_SUB_LENTYPE: + case IO_SUB_GRNR: + case IO_SUB_PARENT: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_LENTYPE_TOPHAT200: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + typekey = 0; /* native int */ + break; + + case IO_FOF_MTOT: + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_FOF_MASSTYPE: + case IO_FOF_SFR: + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_SUB_MTOT: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_MASSTYPE: + case IO_SUB_CM: + case IO_SUB_SPIN: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: + case IO_FOF_CMFRAC: + case IO_FOF_CMFRACTYPE: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + typekey = 1; /* native MyOutputFloat */ + break; + + case IO_SUB_IDMOSTBOUND: + case IO_FOFSUB_IDS: +#ifdef LONGIDS + typekey = 2; /* native long long */ +#else /* #ifdef LONGIDS */ + typekey = 0; /* native int */ +#endif /* #ifdef LONGIDS #else */ + break; + + case IO_FOF_FUZZOFFTYPE: + typekey = 2; /* native long long */ + break; + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } + + return typekey; +} + +/*! \brief Determines if block is present in the current code configuration. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return 0: not present; 1: present. + */ +int fof_subfind_blockpresent(enum fof_subfind_iofields blocknr) +{ + int present = 0; + + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_LENTYPE: + case IO_FOF_MTOT: + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_FOF_MASSTYPE: + present = 1; + break; + + case IO_FOF_SFR: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef USE_SFR + present = 1; +#endif /* #ifdef USE_SFR */ + break; + + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: +#ifdef MHD + present = 1; +#endif /* #ifdef MHD */ + break; + + case IO_FOF_FUZZOFFTYPE: + break; + + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_SUB_LEN: + case IO_SUB_LENTYPE: + case IO_SUB_MTOT: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_MASSTYPE: + case IO_SUB_CM: + case IO_SUB_SPIN: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_SUB_IDMOSTBOUND: + case IO_SUB_GRNR: + case IO_SUB_PARENT: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: + case IO_FOF_CMFRAC: + case IO_FOF_CMFRACTYPE: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_LENTYPE_TOPHAT200: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ +#ifdef SUBFIND + present = 1; +#else /* #ifdef SUBFIND */ + present = 0; +#endif /* #ifdef SUBFIND #else */ + break; + + case IO_FOFSUB_IDS: +#ifdef FOF_STOREIDS + present = 1; +#else /* #ifdef FOF_STOREIDS */ + present = 0; +#endif /* #ifdef FOF_STOREIDS #else */ + break; + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } + return present; +} + +/*! \brief Get the 4 letter IO label for a given output field. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * \param[out] label String with the label. + * + * \return void + */ +void fof_subfind_get_Tab_IO_Label(enum fof_subfind_iofields blocknr, char *label) +{ + switch(blocknr) + { + case IO_FOF_LEN: + strncpy(label, "FLEN", 4); + break; + case IO_FOF_MTOT: + strncpy(label, "FMAS", 4); + break; + case IO_FOF_POS: + strncpy(label, "FPOS", 4); + break; + case IO_FOF_CM: + strncpy(label, "FGCM", 4); + break; + case IO_FOF_VEL: + strncpy(label, "FVEL", 4); + break; + case IO_FOF_LENTYPE: + strncpy(label, "FLTY", 4); + break; + case IO_FOF_MASSTYPE: + strncpy(label, "FMTY", 4); + break; + case IO_FOF_SFR: + strncpy(label, "FSFR", 4); + break; + case IO_FOF_M_MEAN200: + strncpy(label, "FMM2", 4); + break; + case IO_FOF_R_MEAN200: + strncpy(label, "FRM2", 4); + break; + case IO_FOF_M_CRIT200: + strncpy(label, "FMC2", 4); + break; + case IO_FOF_R_CRIT200: + strncpy(label, "FRC2", 4); + break; + case IO_FOF_M_TOPHAT200: + strncpy(label, "FMT2", 4); + break; + case IO_FOF_R_TOPHAT200: + strncpy(label, "FRT2", 4); + break; + case IO_FOF_M_CRIT500: + strncpy(label, "FMC5", 4); + break; + case IO_FOF_R_CRIT500: + strncpy(label, "FRC5", 4); + break; + case IO_FOF_NSUBS: + strncpy(label, "FNSH", 4); + break; + case IO_FOF_FIRSTSUB: + strncpy(label, "FFSH", 4); + break; + case IO_FOF_FUZZOFFTYPE: + strncpy(label, "FUOF", 4); + break; + + case IO_SUB_LEN: + strncpy(label, "SLEN", 4); + break; + case IO_SUB_MTOT: + strncpy(label, "SMAS", 4); + break; + case IO_SUB_POS: + strncpy(label, "SPOS", 4); + break; + case IO_SUB_VEL: + strncpy(label, "SVEL", 4); + break; + case IO_SUB_LENTYPE: + strncpy(label, "SLTY", 4); + break; + case IO_SUB_MASSTYPE: + strncpy(label, "SMTY", 4); + break; + case IO_SUB_CM: + strncpy(label, "SCMP", 4); + break; + case IO_SUB_SPIN: + strncpy(label, "SSPI", 4); + break; + case IO_SUB_VELDISP: + strncpy(label, "SVDI", 4); + break; + case IO_SUB_VMAX: + strncpy(label, "SVMX", 4); + break; + case IO_SUB_VMAXRAD: + strncpy(label, "SVRX", 4); + break; + case IO_SUB_HALFMASSRAD: + strncpy(label, "SHMR", 4); + break; + case IO_SUB_HALFMASSRADTYPE: + strncpy(label, "SHMT", 4); + break; + case IO_SUB_MASSINRAD: + strncpy(label, "SMIR", 4); + break; + case IO_SUB_MASSINHALFRAD: + strncpy(label, "SMIH", 4); + break; + case IO_SUB_MASSINMAXRAD: + strncpy(label, "SMIM", 4); + break; + case IO_SUB_MASSINRADTYPE: + strncpy(label, "SMIT", 4); + break; + case IO_SUB_MASSINHALFRADTYPE: + strncpy(label, "SMHT", 4); + break; + case IO_SUB_MASSINMAXRADTYPE: + strncpy(label, "SMMT", 4); + break; + case IO_SUB_IDMOSTBOUND: + strncpy(label, "SIDM", 4); + break; + case IO_SUB_GRNR: + strncpy(label, "SGNR", 4); + break; + case IO_SUB_PARENT: + strncpy(label, "SPRT", 4); + break; + case IO_SUB_BFLD_HALO: + strncpy(label, "BFDH", 4); + break; + case IO_SUB_BFLD_DISK: + strncpy(label, "BFDD", 4); + break; + case IO_SUB_SFR: + strncpy(label, "SSFR", 4); + break; + case IO_SUB_SFRINRAD: + strncpy(label, "SSFI", 4); + break; + case IO_SUB_SFRINHALFRAD: + strncpy(label, "SSFH", 4); + break; + case IO_SUB_SFRINMAXRAD: + strncpy(label, "SSFM", 4); + break; + case IO_FOFSUB_IDS: + strncpy(label, "PIDS", 4); + break; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + strncpy(label, "FJM2", 4); + break; + case IO_FOF_JDM_MEAN200: + strncpy(label, "JDM2", 4); + break; + case IO_FOF_JGAS_MEAN200: + strncpy(label, "JGM2", 4); + break; + case IO_FOF_JSTARS_MEAN200: + strncpy(label, "JSM2", 4); + break; + case IO_FOF_MASSTYPE_MEAN200: + strncpy(label, "MTM2", 4); + break; + case IO_FOF_LENTYPE_MEAN200: + strncpy(label, "LTM2", 4); + break; + case IO_FOF_CMFRAC_MEAN200: + strncpy(label, "CFM2", 4); + break; + case IO_FOF_CMFRACTYPE_MEAN200: + strncpy(label, "FTM2", 4); + break; + case IO_FOF_J_CRIT200: + strncpy(label, "FJC2", 4); + break; + case IO_FOF_JDM_CRIT200: + strncpy(label, "JDC2", 4); + break; + case IO_FOF_JGAS_CRIT200: + strncpy(label, "JGC2", 4); + break; + case IO_FOF_JSTARS_CRIT200: + strncpy(label, "JSC2", 4); + break; + case IO_FOF_MASSTYPE_CRIT200: + strncpy(label, "MTC2", 4); + break; + case IO_FOF_LENTYPE_CRIT200: + strncpy(label, "LTC2", 4); + break; + case IO_FOF_CMFRAC_CRIT200: + strncpy(label, "CFC2", 4); + break; + case IO_FOF_CMFRACTYPE_CRIT200: + strncpy(label, "FTC2", 4); + break; + case IO_FOF_J_TOPHAT200: + strncpy(label, "FJT2", 4); + break; + case IO_FOF_JDM_TOPHAT200: + strncpy(label, "JDT2", 4); + break; + case IO_FOF_JGAS_TOPHAT200: + strncpy(label, "JGT2", 4); + break; + case IO_FOF_JSTARS_TOPHAT200: + strncpy(label, "JST2", 4); + break; + case IO_FOF_MASSTYPE_TOPHAT200: + strncpy(label, "MTT2", 4); + break; + case IO_FOF_LENTYPE_TOPHAT200: + strncpy(label, "LTT2", 4); + break; + case IO_FOF_CMFRAC_TOPHAT200: + strncpy(label, "CFT2", 4); + break; + case IO_FOF_CMFRACTYPE_TOPHAT200: + strncpy(label, "FTT2", 4); + break; + case IO_FOF_J_CRIT500: + strncpy(label, "FJC5", 4); + break; + case IO_FOF_JDM_CRIT500: + strncpy(label, "JDC5", 4); + break; + case IO_FOF_JGAS_CRIT500: + strncpy(label, "JGC5", 4); + break; + case IO_FOF_JSTARS_CRIT500: + strncpy(label, "JSC5", 4); + break; + case IO_FOF_MASSTYPE_CRIT500: + strncpy(label, "MTC5", 4); + break; + case IO_FOF_LENTYPE_CRIT500: + strncpy(label, "LTC5", 4); + break; + case IO_FOF_CMFRAC_CRIT500: + strncpy(label, "CFC5", 4); + break; + case IO_FOF_CMFRACTYPE_CRIT500: + strncpy(label, "FTC5", 4); + break; + case IO_FOF_J: + strncpy(label, "FOFJ", 4); + break; + case IO_FOF_JDM: + strncpy(label, "FOJD", 4); + break; + case IO_FOF_JGAS: + strncpy(label, "FOJG", 4); + break; + case IO_FOF_JSTARS: + strncpy(label, "FOJS", 4); + break; + case IO_FOF_CMFRAC: + strncpy(label, "FOCF", 4); + break; + case IO_FOF_CMFRACTYPE: + strncpy(label, "FOFT", 4); + break; + case IO_FOF_EKIN: + strncpy(label, "EKIN", 4); + break; + case IO_FOF_ETHR: + strncpy(label, "ETHR", 4); + break; + case IO_FOF_EPOT: + strncpy(label, "EPOT", 4); + break; + + case IO_FOF_EPOT_CRIT200: + strncpy(label, "EPO1", 4); + break; + case IO_FOF_EKIN_CRIT200: + strncpy(label, "EKI1", 4); + break; + case IO_FOF_ETHR_CRIT200: + strncpy(label, "ETH1", 4); + break; + case IO_FOF_EPOT_MEAN200: + strncpy(label, "EPO2", 4); + break; + case IO_FOF_EKIN_MEAN200: + strncpy(label, "EKI2", 4); + break; + case IO_FOF_ETHR_MEAN200: + strncpy(label, "ETH2", 4); + break; + case IO_FOF_EPOT_TOPHAT200: + strncpy(label, "EPO3", 4); + break; + case IO_FOF_EKIN_TOPHAT200: + strncpy(label, "EKI3", 4); + break; + case IO_FOF_ETHR_TOPHAT200: + strncpy(label, "ETH3", 4); + break; + case IO_FOF_EPOT_CRIT500: + strncpy(label, "EPO4", 4); + break; + case IO_FOF_EKIN_CRIT500: + strncpy(label, "EKI4", 4); + break; + case IO_FOF_ETHR_CRIT500: + strncpy(label, "ETH4", 4); + break; + + case IO_SUB_EKIN: + strncpy(label, "SEKN", 4); + break; + case IO_SUB_ETHR: + strncpy(label, "SETH", 4); + break; + case IO_SUB_EPOT: + strncpy(label, "SEPT", 4); + break; + case IO_SUB_J: + strncpy(label, "SUBJ", 4); + break; + case IO_SUB_JDM: + strncpy(label, "SJDM", 4); + break; + case IO_SUB_JGAS: + strncpy(label, "SJGS", 4); + break; + case IO_SUB_JSTARS: + strncpy(label, "SJST", 4); + break; + case IO_SUB_JINHALFRAD: + strncpy(label, "SJHR", 4); + break; + case IO_SUB_JDMINHALFRAD: + strncpy(label, "SJDH", 4); + break; + case IO_SUB_JGASINHALFRAD: + strncpy(label, "SJGH", 4); + break; + case IO_SUB_JSTARSINHALFRAD: + strncpy(label, "SJSH", 4); + break; + case IO_SUB_JINRAD: + strncpy(label, "SJMR", 4); + break; + case IO_SUB_JDMINRAD: + strncpy(label, "SJDR", 4); + break; + case IO_SUB_JGASINRAD: + strncpy(label, "SJGR", 4); + break; + case IO_SUB_JSTARSINRAD: + strncpy(label, "SJSR", 4); + break; + case IO_SUB_CMFRAC: + strncpy(label, "SCMF", 4); + break; + case IO_SUB_CMFRACTYPE: + strncpy(label, "SCMT", 4); + break; + case IO_SUB_CMFRACINHALFRAD: + strncpy(label, "SCMH", 4); + break; + case IO_SUB_CMFRACTYPEINHALFRAD: + strncpy(label, "SCTH", 4); + break; + case IO_SUB_CMFRACINRAD: + strncpy(label, "SCMR", 4); + break; + case IO_SUB_CMFRACTYPEINRAD: + strncpy(label, "SCTR", 4); + break; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } +} + +#ifdef HAVE_HDF5 +/*! \brief Function that handles writing hdf5 header. + * + * \param[in] handle Handle for header hdf5 group. + * + * \return void + */ +void fof_subfind_write_header_attributes_in_hdf5(hid_t handle) +{ + hid_t hdf5_dataspace, hdf5_attribute; + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Ngroups_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.Ngroups, "Ngroups_ThisFile"); + my_H5Aclose(hdf5_attribute, "Ngroups_ThisFile"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Nsubgroups_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.Nsubgroups, "Nsubgroups_ThisFile"); + my_H5Aclose(hdf5_attribute, "Nsubgroups_ThisFile"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Nids_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.Nids, "Nids_ThisFile"); + my_H5Aclose(hdf5_attribute, "Nids_ThisFile"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Ngroups_Total", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.TotNgroups, "Ngroups_Total"); + my_H5Aclose(hdf5_attribute, "Ngroups_Total"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Nsubgroups_Total", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.TotNsubgroups, "Nsubgroups_Total"); + my_H5Aclose(hdf5_attribute, "Nsubgroups_Total"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Nids_Total", H5T_NATIVE_INT64, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT64, &catalogue_header.TotNids, "Nids_Total"); + my_H5Aclose(hdf5_attribute, "Nids_Total"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "NumFiles", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.num_files, "NumFiles"); + my_H5Aclose(hdf5_attribute, "NumFiles"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Time", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.time, "Time"); + my_H5Aclose(hdf5_attribute, "Time"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Redshift", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.redshift, "Redshift"); + my_H5Aclose(hdf5_attribute, "Redshift"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "HubbleParam", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.HubbleParam, "HubbleParam"); + my_H5Aclose(hdf5_attribute, "HubbleParam"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "BoxSize", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.BoxSize, "BoxSize"); + my_H5Aclose(hdf5_attribute, "BoxSize"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Omega0", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.Omega0, "Omega0"); + my_H5Aclose(hdf5_attribute, "Omega0"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "OmegaLambda", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.OmegaLambda, "OmegaLambda"); + my_H5Aclose(hdf5_attribute, "OmegaLambda"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "FlagDoubleprecision", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.flag_doubleprecision, "FlagDoubleprecision"); + my_H5Aclose(hdf5_attribute, "FlagDoubleprecision"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hid_t atype = my_H5Tcopy(H5T_C_S1); + + my_H5Tset_size(atype, strlen(GIT_COMMIT)); + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Git_commit", atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, GIT_COMMIT, "Git_commit"); + my_H5Aclose(hdf5_attribute, "Git_commit"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + my_H5Tset_size(atype, strlen(GIT_DATE)); + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Git_date", atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, GIT_DATE, "Git_date"); + my_H5Aclose(hdf5_attribute, "Git_date"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); +} +#endif /* #ifdef HAVE_HDF5 */ + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_nearest.c b/src/amuse/community/arepo/src/fof/fof_nearest.c new file mode 100644 index 0000000000..c21badf579 --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_nearest.c @@ -0,0 +1,473 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_nearest.c + * \date 05/2018 + * \brief Routine to find nearest primary link type particle to link + * secondary link type to FoF groups. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * double fof_find_nearest_dmparticle(MyIDType * vMinID, int + * *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask) + * static int fof_find_nearest_dmparticle_evaluate(int target, + * int mode, int threadid) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +static MyFloat *fof_nearest_distance; +static MyFloat *fof_nearest_hsml; + +static MyIDType *MinID; +static int *Head, *Len, *Next, *Tail, *MinIDTask; + +static int fof_find_nearest_dmparticle_evaluate(int target, int mode, int threadid); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + in->Hsml = fof_nearest_hsml[i]; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Distance; + MyIDType MinID; + int MinIDTask; +#if defined(SUBFIND) + MyFloat DM_Hsml; +#endif /* #if defined(SUBFIND) */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (PS) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(out->Distance < fof_nearest_distance[i]) + { + fof_nearest_distance[i] = out->Distance; + MinID[i] = out->MinID; + MinIDTask[i] = out->MinIDTask; +#if defined(SUBFIND) + PS[i].Hsml = out->DM_Hsml; +#endif /* #if defined(SUBFIND) */ + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPart) + break; + + if((1 << P[i].Type) & (FOF_SECONDARY_LINK_TYPES)) + { + if(fof_nearest_distance[i] > 1.0e29) /* we haven't found any neighbor yet */ + { + fof_find_nearest_dmparticle_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + fof_find_nearest_dmparticle_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Finds nearest dark matter particle for secondary link types + * + * \param[out] vMinID Pointer to MinID array. + * \param[in] vHead Pointer to Head array. + * \param[in] vLen Pointer to Len array. + * \param[in] vNext Pointer to Next array. + * \param[in] vTail Pointer to Tail array. + * \param[out] vMinIDTask Pointer to MinIDTask array. + * + * \return Time spent in this function. + */ +double fof_find_nearest_dmparticle(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask) +{ + MinID = vMinID; + Head = vHead; + Len = vLen; + Next = vNext; + Tail = vTail; + MinIDTask = vMinIDTask; + + int i, n, npleft, iter; + long long ntot; + double tstart = second(); + + mpi_printf("FOF: Start finding nearest dm-particle (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + fof_nearest_distance = (MyFloat *)mymalloc("fof_nearest_distance", sizeof(MyFloat) * NumPart); + fof_nearest_hsml = (MyFloat *)mymalloc("fof_nearest_hsml", sizeof(MyFloat) * NumPart); + + for(n = 0; n < NumPart; n++) + { + if((1 << P[n].Type) & (FOF_SECONDARY_LINK_TYPES)) + { + fof_nearest_distance[n] = 1.0e30; + if(P[n].Type == 0) +#ifdef USE_AREPO_FOF_WITH_GADGET_FIX + fof_nearest_hsml[n] = SphP[n].Hsml; +#else /* #ifdef USE_AREPO_FOF_WITH_GADGET_FIX */ + fof_nearest_hsml[n] = get_cell_radius(n); +#endif /* #ifdef USE_AREPO_FOF_WITH_GADGET_FIX #else */ + else + fof_nearest_hsml[n] = 0.1 * LinkL; + } + } + + generic_set_MaxNexport(); + + iter = 0; + /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ + do + { + double t0 = second(); + + generic_comm_pattern(NumPart, kernel_local, kernel_imported); + + /* do final operations on results */ + for(i = 0, npleft = 0; i < NumPart; i++) + { + if((1 << P[i].Type) & (FOF_SECONDARY_LINK_TYPES)) + { + if(fof_nearest_distance[i] > 1.0e29) + { + if(fof_nearest_hsml[i] < 4 * LinkL) /* we only search out to a maximum distance */ + { + /* need to redo this particle */ + npleft++; + fof_nearest_hsml[i] *= 2.0; + if(iter >= MAXITER - 10) + { + printf("FOF: i=%d task=%d ID=%d P[i].Type=%d Hsml=%g LinkL=%g nearest=%g pos=(%g|%g|%g)\n", i, ThisTask, + (int)P[i].ID, P[i].Type, fof_nearest_hsml[i], LinkL, fof_nearest_distance[i], P[i].Pos[0], + P[i].Pos[1], P[i].Pos[2]); + myflush(stdout); + } + } + else + { + fof_nearest_distance[i] = 0; /* we do not continue to search for this particle */ + } + } + } + } + + sumup_large_ints(1, &npleft, &ntot); + + double t1 = second(); + if(ntot > 0) + { + iter++; + if(iter > 0) + mpi_printf("FOF: fof-nearest iteration %d: need to repeat for %lld particles. (took = %g sec)\n", iter, ntot, + timediff(t0, t1)); + + if(iter > MAXITER) + terminate("FOF: failed to converge in fof-nearest\n"); + } + } + while(ntot > 0); + + myfree(fof_nearest_hsml); + myfree(fof_nearest_distance); + + mpi_printf("FOF: done finding nearest dm-particle\n"); + + double tend = second(); + return timediff(tstart, tend); +} + +/*! \brief Evaluate function to finding nearest dark matter particle for + * secondary link types. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +static int fof_find_nearest_dmparticle_evaluate(int target, int mode, int threadid) +{ + int k, no, index, numnodes, *firstnode; + double h, r2max, dist; + double dx, dy, dz, r2; + MyDouble *pos; + data_in local, *target_data; + data_out out; + + double xtmp, ytmp, ztmp; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + h = target_data->Hsml; + + index = -1; + r2max = 1.0e30; + + /* Now start the actual tree-walk computation for this particle */ + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Tree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Tree_MaxPart) /* single particle */ + { + int p = no; + no = Nextnode[no]; + + if(!((1 << P[p].Type) & (FOF_SECONDARY_LINK_TARGET_TYPES))) + continue; + + dist = h; + dx = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - pos[2]); + if(dz > dist) + continue; + + r2 = dx * dx + dy * dy + dz * dz; + if(r2 < r2max && r2 < h * h) + { + index = p; + r2max = r2; + } + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + struct NODE *current = &Nodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + dist = h + 0.5 * current->len; + dx = FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > dist) + continue; + + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + terminate("do not expect imported points here"); + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + if(target >= 0) + tree_treefind_export_node_threads(no, target, threadid); + + no = Nextnode[no - Tree_MaxNodes]; + } + } + } + + if(index >= 0) + { + out.Distance = sqrt(r2max); + out.MinID = MinID[Head[index]]; + out.MinIDTask = MinIDTask[Head[index]]; +#if defined(SUBFIND) + out.DM_Hsml = PS[index].Hsml; +#endif /* #if defined(SUBFIND) */ + } + else + { + out.Distance = 2.0e30; + out.MinID = 0; + out.MinIDTask = -1; +#if defined(SUBFIND) + out.DM_Hsml = 0; +#endif /* #if defined(SUBFIND) */ + } + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_sort_kernels.c b/src/amuse/community/arepo/src/fof/fof_sort_kernels.c new file mode 100644 index 0000000000..e10627ca7f --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_sort_kernels.c @@ -0,0 +1,495 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_sort_kernels.c + * \date 05/2018 + * \brief Various sort kernels used by the parallel FoF group finder. + * \details contains functions: + * int fof_compare_local_sort_data_targetindex(const void *a, + * const void *b) + * int fof_compare_aux_sort_Type(const void *a, const void *b) + * int fof_compare_aux_sort_FileOrder(const void *a, + * const void *b) + * int fof_compare_aux_sort_GrNr(const void *a, const void *b) + * int fof_compare_aux_sort_OriginTask_OriginIndex(const void + * *a, const void *b) + * int fof_compare_FOF_PList_MinID(const void *a, const void *b) + * int fof_compare_FOF_GList_MinID(const void *a, const void *b) + * int fof_compare_FOF_GList_MinIDTask(const void *a, + * const void *b) + * int fof_compare_FOF_GList_MinIDTask_MinID(const void *a, + * const void *b) + * int fof_compare_FOF_GList_LocCountTaskDiffMinID(const void + * *a, const void *b) + * int fof_compare_FOF_GList_ExtCountMinID(const void *a, + * const void *b) + * int fof_compare_Group_MinID(const void *a, const void *b) + * int fof_compare_Group_GrNr(const void *a, const void *b) + * int fof_compare_Group_MinIDTask(const void *a, const void *b) + * int fof_compare_Group_MinIDTask_MinID(const void *a, + * const void *b) + * int fof_compare_Group_Len(const void *a, const void *b) + * int fof_compare_ID_list_GrNrID(const void *a, const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +/*! \brief Comparison function for fof_local_sort_data objects. + * + * Sorting kernel comparing element targetindex. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_local_sort_data_targetindex(const void *a, const void *b) +{ + if(((struct fof_local_sort_data *)a)->targetindex < ((struct fof_local_sort_data *)b)->targetindex) + return -1; + + if(((struct fof_local_sort_data *)a)->targetindex > ((struct fof_local_sort_data *)b)->targetindex) + return +1; + + return 0; +} + +/*! \brief Comparison function for data_aux_sort objects. + * + * Sorting kernel comparing element Type. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_aux_sort_Type(const void *a, const void *b) +{ + if(((struct data_aux_sort *)a)->Type < ((struct data_aux_sort *)b)->Type) + return -1; + + if(((struct data_aux_sort *)a)->Type > ((struct data_aux_sort *)b)->Type) + return +1; + + return 0; +} + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) +/*! \brief Comparison function for data_aux_sort objects. + * + * Sorting kernel comparing element FileOrder. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_aux_sort_FileOrder(const void *a, const void *b) +{ + if(((struct data_aux_sort *)a)->FileOrder < ((struct data_aux_sort *)b)->FileOrder) + return -1; + + if(((struct data_aux_sort *)a)->FileOrder > ((struct data_aux_sort *)b)->FileOrder) + return +1; + + return 0; +} +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + +/*! \brief Comparison function for data_aux_sort objects. + * + * Sorting kernel comparing elements (most important fist): + * GrNr, SubNr, DM_BindingEnergy + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_aux_sort_GrNr(const void *a, const void *b) +{ + if(((struct data_aux_sort *)a)->GrNr < ((struct data_aux_sort *)b)->GrNr) + return -1; + + if(((struct data_aux_sort *)a)->GrNr > ((struct data_aux_sort *)b)->GrNr) + return +1; + +#ifdef SUBFIND + if(((struct data_aux_sort *)a)->SubNr < ((struct data_aux_sort *)b)->SubNr) + return -1; + + if(((struct data_aux_sort *)a)->SubNr > ((struct data_aux_sort *)b)->SubNr) + return +1; + + if(((struct data_aux_sort *)a)->DM_BindingEnergy < ((struct data_aux_sort *)b)->DM_BindingEnergy) + return -1; + + if(((struct data_aux_sort *)a)->DM_BindingEnergy > ((struct data_aux_sort *)b)->DM_BindingEnergy) + return +1; +#endif /* #ifdef SUBFIND */ + + if(((struct data_aux_sort *)a)->ID < ((struct data_aux_sort *)b)->ID) + return -1; + + if(((struct data_aux_sort *)a)->ID > ((struct data_aux_sort *)b)->ID) + return +1; + + return 0; +} + +/*! \brief Comparison function for data_aux_sort objects. + * + * Sorting kernel comparing elements (most important first): + * OriginTask, OriginIndex + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_aux_sort_OriginTask_OriginIndex(const void *a, const void *b) +{ + if(((struct data_aux_sort *)a)->OriginTask < ((struct data_aux_sort *)b)->OriginTask) + return -1; + + if(((struct data_aux_sort *)a)->OriginTask > ((struct data_aux_sort *)b)->OriginTask) + return +1; + + if(((struct data_aux_sort *)a)->OriginIndex < ((struct data_aux_sort *)b)->OriginIndex) + return -1; + + if(((struct data_aux_sort *)a)->OriginIndex > ((struct data_aux_sort *)b)->OriginIndex) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_particle_list objects. + * + * Sorting kernel comparing element MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_PList_MinID(const void *a, const void *b) +{ + if(((struct fof_particle_list *)a)->MinID < ((struct fof_particle_list *)b)->MinID) + return -1; + + if(((struct fof_particle_list *)a)->MinID > ((struct fof_particle_list *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing element MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_GList_MinID(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID) + return -1; + + if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing element MinIDTask. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_GList_MinIDTask(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->MinIDTask < ((struct fof_group_list *)b)->MinIDTask) + return -1; + + if(((struct fof_group_list *)a)->MinIDTask > ((struct fof_group_list *)b)->MinIDTask) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing elements (most important first): + * MinIDTask, MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_GList_MinIDTask_MinID(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->MinIDTask < ((struct fof_group_list *)b)->MinIDTask) + return -1; + + if(((struct fof_group_list *)a)->MinIDTask > ((struct fof_group_list *)b)->MinIDTask) + return +1; + + if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID) + return -1; + + if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing elements (most important first):. + * LocCount, MinID, ExtCount. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for LocCount where -1 if a > b + */ +int fof_compare_FOF_GList_LocCountTaskDiffMinID(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->LocCount > ((struct fof_group_list *)b)->LocCount) + return -1; + + if(((struct fof_group_list *)a)->LocCount < ((struct fof_group_list *)b)->LocCount) + return +1; + + if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID) + return -1; + + if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID) + return +1; + + if(labs(((struct fof_group_list *)a)->ExtCount - ((struct fof_group_list *)a)->MinIDTask) < + labs(((struct fof_group_list *)b)->ExtCount - ((struct fof_group_list *)b)->MinIDTask)) + return -1; + + if(labs(((struct fof_group_list *)a)->ExtCount - ((struct fof_group_list *)a)->MinIDTask) > + labs(((struct fof_group_list *)b)->ExtCount - ((struct fof_group_list *)b)->MinIDTask)) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing elements (most important first): + * ExtCount, MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_GList_ExtCountMinID(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->ExtCount < ((struct fof_group_list *)b)->ExtCount) + return -1; + + if(((struct fof_group_list *)a)->ExtCount > ((struct fof_group_list *)b)->ExtCount) + return +1; + + if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID) + return -1; + + if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing element MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_Group_MinID(const void *a, const void *b) +{ + if(((struct group_properties *)a)->MinID < ((struct group_properties *)b)->MinID) + return -1; + + if(((struct group_properties *)a)->MinID > ((struct group_properties *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing element GrNr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_Group_GrNr(const void *a, const void *b) +{ + if(((struct group_properties *)a)->GrNr < ((struct group_properties *)b)->GrNr) + return -1; + + if(((struct group_properties *)a)->GrNr > ((struct group_properties *)b)->GrNr) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing element MinIDTask. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_Group_MinIDTask(const void *a, const void *b) +{ + if(((struct group_properties *)a)->MinIDTask < ((struct group_properties *)b)->MinIDTask) + return -1; + + if(((struct group_properties *)a)->MinIDTask > ((struct group_properties *)b)->MinIDTask) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing elements (most imporant first): + * MinIDTask, MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_Group_MinIDTask_MinID(const void *a, const void *b) +{ + if(((struct group_properties *)a)->MinIDTask < ((struct group_properties *)b)->MinIDTask) + return -1; + + if(((struct group_properties *)a)->MinIDTask > ((struct group_properties *)b)->MinIDTask) + return +1; + + if(((struct group_properties *)a)->MinID < ((struct group_properties *)b)->MinID) + return -1; + + if(((struct group_properties *)a)->MinID > ((struct group_properties *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing element Len. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a > b. + */ +int fof_compare_Group_Len(const void *a, const void *b) +{ + if(((struct group_properties *)a)->Len > ((struct group_properties *)b)->Len) + return -1; + + if(((struct group_properties *)a)->Len < ((struct group_properties *)b)->Len) + return +1; + + return 0; +} + +/*! \brief Comparison function for id_list objects. + * + * Sorting kernel comparing elements (most important first): + * GrNr, Type, ID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_ID_list_GrNrID(const void *a, const void *b) +{ + if(((struct id_list *)a)->GrNr < ((struct id_list *)b)->GrNr) + return -1; + + if(((struct id_list *)a)->GrNr > ((struct id_list *)b)->GrNr) + return +1; + + if(((struct id_list *)a)->Type < ((struct id_list *)b)->Type) + return -1; + + if(((struct id_list *)a)->Type > ((struct id_list *)b)->Type) + return +1; + + if(((struct id_list *)a)->ID < ((struct id_list *)b)->ID) + return -1; + + if(((struct id_list *)a)->ID > ((struct id_list *)b)->ID) + return +1; + + return 0; +} + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_vars.c b/src/amuse/community/arepo/src/fof/fof_vars.c new file mode 100644 index 0000000000..2df2856c66 --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_vars.c @@ -0,0 +1,79 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_vars.c + * \date 05/2018 + * \brief Iinstances for the global variables used by FOF, which are + * declared in fof.h + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +int Ngroups, NgroupsExt, MaxNgroups, TotNgroups, Nsubgroups, TotNsubgroups; +int Nids; +long long TotNids; + +double LinkL = 0; + +int fof_OldMaxPart; +int fof_OldMaxPartSph; + +unsigned char *flag_node_inside_linkinglength; + +struct group_properties *Group; + +struct fofdata_in *FoFDataIn, *FoFDataGet; + +struct fofdata_out *FoFDataResult, *FoFDataOut; + +struct fof_particle_list *FOF_PList; + +struct fof_group_list *FOF_GList; + +struct id_list *ID_list; + +struct bit_flags *Flags; + +struct fof_subfind_header catalogue_header; + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/gitversion/version b/src/amuse/community/arepo/src/gitversion/version new file mode 100644 index 0000000000..9cd3dc25eb --- /dev/null +++ b/src/amuse/community/arepo/src/gitversion/version @@ -0,0 +1,7 @@ +#ifndef VERSION_H +#define VERSION_H + +const char* GIT_DATE = "_DATE_"; +const char* GIT_COMMIT = "_COMMIT_"; + +#endif diff --git a/src/amuse/community/arepo/src/gitversion/version.h b/src/amuse/community/arepo/src/gitversion/version.h new file mode 100644 index 0000000000..7d33b0889a --- /dev/null +++ b/src/amuse/community/arepo/src/gitversion/version.h @@ -0,0 +1,38 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gitversion/version.h + * \date 05/2018 + * \brief Header for git-version variables. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef VERSION_H +#define VERSION_H + +extern const char* GIT_DATE; +extern const char* GIT_COMMIT; + +#endif /* #ifndef VERSION_H */ diff --git a/src/amuse/community/arepo/src/gravity/accel.c b/src/amuse/community/arepo/src/gravity/accel.c new file mode 100644 index 0000000000..493216c347 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/accel.c @@ -0,0 +1,347 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/accel.c + * \date 05/2018 + * \brief Routines to carry out gravity force computation. + * \details contains functions: + * void compute_grav_accelerations(int timebin, int fullflag) + * void gravity(int timebin, int fullflag) + * void gravity_force_finalize(int timebin) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 03.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Computes the gravitational accelerations for all active particles. + * + * If the particle mesh is used and the current time step + * requires a PM force computation, new long range forces are + * computed by long_range_force(). Then the short-range tree forces + * are computed by gravity(). The force tree is rebuild every time step. + * + * \param[in] timebin Current timebin for which gravity is calculated + * (positive integer). + * \param[in] fullflag Flag whether this is a global timestep + * (Flag_Full_Tree, Flag_Partial_Tree). + * + * \return void + */ +void compute_grav_accelerations(int timebin, int fullflag) +{ + if(TimeBinsGravity.GlobalNActiveParticles > 0) + { + if(All.TypeOfOpeningCriterion == 1 && All.Ti_Current == 0 && All.ErrTolTheta > 0) + { + /* For the first timestep, we do one gravity calculation up front + * with the Barnes & Hut Criterion to allow usage of relative opening + * criterion with consistent accuracy. + */ +#ifdef PMGRID + long_range_force(); +#endif /* #ifdef PMGRID */ + gravity(timebin, fullflag); + } + + gravity(timebin, fullflag); /* computes (short-range) gravity accel. */ + +#ifdef FORCETEST + gravity_forcetest(); +#endif /* #ifdef FORCETEST */ + } +} + +/*! \brief Main routine for tree force calculation. + * + * This routine handles the tree force calculation. First it builds a new + * force tree calling force_treebuild() at every timestep. This tree is then + * used to calculate a new tree force for every active particle by calling + * gravity_tree(). + * + * \param[in] timebin Current timebin for which gravity is calculated. + * \param[in] fullflag Flag whether this is a global timestep. + * + * \return void + */ +void gravity(int timebin, int fullflag) +{ + double tstart = second(); + +#if defined(SELFGRAVITY) + /* set new softening lengths on global steps to take into account possible cosmological time variation */ + if(timebin == All.HighestOccupiedGravTimeBin) + set_softenings(); + +#ifdef ALLOW_DIRECT_SUMMATION + if(TimeBinsGravity.GlobalNActiveParticles < DIRECT_SUMMATION_THRESHOLD) + { + gravity_direct(timebin); + +#ifndef ONEDIMS_SPHERICAL + gravity_force_finalize(timebin); +#endif /* #ifndef ONEDIMS_SPHERICAL */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + calc_exact_gravity_for_particle_type(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#ifdef EXTERNALGRAVITY + gravity_external(); +#endif /* #ifdef EXTERNALGRAVITY */ + } + else +#endif /* #ifdef ALLOW_DIRECT_SUMMATION */ + { +#ifdef ONEDIMS_SPHERICAL + gravity_monopole_1d_spherical(); +#else /* #ifdef ONEDIMS_SPHERICAL */ + + if(TimeBinsGravity.GlobalNActiveParticles >= 10 * NTask) + construct_forcetree(0, 1, 0, timebin); /* build force tree with all particles */ + else + construct_forcetree(0, 0, 0, timebin); /* build force tree with all particles */ + + gravity_tree(timebin); + + gravity_force_finalize(timebin); + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + calc_exact_gravity_for_particle_type(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#ifdef EXTERNALGRAVITY + gravity_external(); +#endif /* #ifdef EXTERNALGRAVITY */ + + /* note: we here moved 'gravity_force_finalize' in front of the non-standard physics; + * reminder: restart flag 18: post-processing calculation potential without running simulation + */ + if(fullflag == FLAG_FULL_TREE && RestartFlag != 18) + calculate_non_standard_physics_with_valid_gravity_tree(); + + /* this is for runs which have the full tree at each time step; no HIERARCHICAL_GRAVITY */ + calculate_non_standard_physics_with_valid_gravity_tree_always(); + + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); +#endif /* #ifdef ONEDIMS_SPHERICAL #else */ + } + +#else /* defined(SELFGRAVITY) */ + + /* self-gravity is switched off */ + int idx, i, j; + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + + if(i < 0) + continue; + +#ifdef EVALPOTENTIAL + P[i].Potential = 0; +#endif /* #ifdef EVALPOTENTIAL */ + + for(j = 0; j < 3; j++) + P[i].GravAccel[j] = 0; + } + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + calc_exact_gravity_for_particle_type(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#ifdef EXTERNALGRAVITY + gravity_external(); +#endif /* #ifdef EXTERNALGRAVITY */ + +#endif /* defined(SELFGRAVITY) #else */ + + double tend = second(); + mpi_printf("GRAVITY: done for timebin %d, %lld particles (took %g sec)\n", timebin, TimeBinsGravity.GlobalNActiveParticles, + timediff(tstart, tend)); +} + +/*! \brief Adds individual gravity contribution and appropriate factors. + * + * Routine combines accelerations of particle mesh and tree and applies + * the required physical constants and scaling factors e.g. for a cosmological + * simulation with nonperiodic gravity. + * + * \param[in] timebin Current timebin for which gravity is calculated. + * + * \return void + */ +void gravity_force_finalize(int timebin) +{ + int i, j, idx; + double ax, ay, az; + + TIMER_START(CPU_TREE); + + /* now add things for comoving integration */ +#ifdef GRAVITY_NOT_PERIODIC +#ifndef PMGRID + if(All.ComovingIntegrationOn) + { + double fac = 0.5 * All.Hubble * All.Hubble * All.Omega0 / All.G; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(j = 0; j < 3; j++) + P[i].GravAccel[j] += fac * P[i].Pos[j]; + } + } +#endif /* #ifndef PMGRID */ +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + +#ifdef HIERARCHICAL_GRAVITY + if(timebin == All.HighestOccupiedGravTimeBin) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + mpi_printf("GRAVTREE: Setting OldAcc!\n"); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifdef PMGRID + ax = P[i].GravAccel[0] + P[i].GravPM[0] / All.G; + ay = P[i].GravAccel[1] + P[i].GravPM[1] / All.G; + az = P[i].GravAccel[2] + P[i].GravPM[2] / All.G; +#else /* #ifdef PMGRID */ + ax = P[i].GravAccel[0]; + ay = P[i].GravAccel[1]; + az = P[i].GravAccel[2]; +#endif /* #ifdef PMGRID #else */ + + P[i].OldAcc = sqrt(ax * ax + ay * ay + az * az); + } + } + + /* muliply by G */ + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(j = 0; j < 3; j++) + P[i].GravAccel[j] *= All.G; + +#ifdef EVALPOTENTIAL + +#if defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) + P[i].Potential += All.MassPMregions[0] * M_PI / (All.Asmth[0] * All.Asmth[0] * boxSize_X * boxSize_Y * boxSize_Z); +#ifdef PLACEHIGHRESREGION + P[i].Potential += All.MassPMregions[1] * M_PI / (All.Asmth[1] * All.Asmth[1] * boxSize_X * boxSize_Y * boxSize_Z); +#endif /* #ifdef PLACEHIGHRESREGION */ +#endif /* #if defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) */ + + /* It's better to not remove the self-potential here to get a smooth potential field for co-spatial particles with varying mass + * or softening. For calculating the binding energy of a particle, the self-energy should then be removed as + * + * P[i].Potential += P[i].Mass / (All.ForceSoftening[P[i].SofteningType] / 2.8); + */ + + P[i].Potential *= All.G; + +#ifdef PMGRID +#ifndef FORCETEST_TESTFORCELAW + P[i].Potential += P[i].PM_Potential; /* add in long-range potential */ +#endif /* #ifndef FORCETEST_TESTFORCELAW */ +#endif /* #ifdef PMGRID */ +#endif /* #ifdef EVALPOTENTIAL */ + if(All.ComovingIntegrationOn) + { +#ifdef GRAVITY_NOT_PERIODIC + double fac, r2; + int k; + + fac = -0.5 * All.Omega0 * All.Hubble * All.Hubble; + + for(k = 0, r2 = 0; k < 3; k++) + r2 += P[i].Pos[k] * P[i].Pos[k]; + +#ifdef EVALPOTENTIAL + P[i].Potential += fac * r2; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + } + else + { + double fac, r2; + int k; + + fac = -0.5 * All.OmegaLambda * All.Hubble * All.Hubble; + + if(fac != 0) + { + for(k = 0, r2 = 0; k < 3; k++) + r2 += P[i].Pos[k] * P[i].Pos[k]; +#ifdef EVALPOTENTIAL + P[i].Potential += fac * r2; +#endif /* #ifdef EVALPOTENTIAL */ + } + } + } + + /* Finally, the following factor allows a computation of a cosmological + * simulation with vacuum energy in physical coordinates + */ +#ifdef GRAVITY_NOT_PERIODIC +#ifndef PMGRID + if(All.ComovingIntegrationOn == 0) + { + double fac = All.OmegaLambda * All.Hubble * All.Hubble; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(j = 0; j < 3; j++) + P[i].GravAccel[j] += fac * P[i].Pos[j]; + } + } +#endif /* #ifndef PMGRID */ +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + + TIMER_STOP(CPU_TREE); +} diff --git a/src/amuse/community/arepo/src/gravity/forcetree.c b/src/amuse/community/arepo/src/gravity/forcetree.c new file mode 100644 index 0000000000..c659a75e97 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree.c @@ -0,0 +1,1827 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree.c + * \date 05/2018 + * \brief Gravitational tree build. + * \details This file contains the construction of the tree used for + * calculating the gravitational force. The type tree implemented + * is a geometrical oct-tree, starting from a cube encompassing + * all particles. This cube is automatically found in the domain + * decomposition, which also splits up the global "top-level" + * tree along node boundaries, moving the particles of different + * parts of the tree to separate processors. In this version of + * the code, the tree construction may be repeated every timestep + * without a renewed domain decomposition. If particles are on + * the "wrong" processor because a new domain decomposition has + * not been carried out, they are sent as temporary points to the + * right insertion processor according to the layout of the + * top-level nodes. In addition, the mapping of the top-level + * nodes to processors may be readjusted in order to improve + * work-load balance for the current time step. + * contains functions: + * int construct_forcetree(int mode, int + * optimized_domain_mapping, int insert_only_primary, + * int timebin) + * int force_treebuild(int npart, int optimized_domain_mapping, + * int insert_only_primary, int timebin) + * int force_treebuild_construct(int npart, int + * optimized_domain_mapping, int insert_only_primary, + * int timebin) + * int force_treebuild_insert_single_point(int i, unsigned + * long long *intpos, int th, unsigned char levels) + * void force_assign_cost_values(void) + * int force_create_empty_nodes(int no, int topnode, int bits, + * int x, int y, int z) + * void force_insert_pseudo_particles(void) + * void force_update_node_recursive(int no, int sib, int father, + * int *last) + * void force_exchange_topleafdata(void) + * void force_treeupdate_toplevel(int no, int topnode, int bits, + * int x, int y, int z) + * void force_treeallocate(int maxpart, int maxindex) + * void force_treefree(void) + * void dump_particles(void) + * int force_add_empty_nodes(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +static int *th_list; +static unsigned char *level_list; +int NTreeInsert; + +#ifdef FOF +#ifndef FOF_SECONDARY_LINK_TARGET_TYPES +#define FOF_SECONDARY_LINK_TARGET_TYPES FOF_PRIMARY_LINK_TYPES +#endif /* #ifndef FOF_SECONDARY_LINK_TARGET_TYPES */ +#endif /* #ifdef FOF */ + +#ifdef HIERARCHICAL_GRAVITY +#define INDEX(idx) (TimeBinsGravity.ActiveParticleList[idx]) +#else /* #ifdef HIERARCHICAL_GRAVITY */ +#define INDEX(idx) (idx) +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ + +/*! \brief Triggers forcetree construction until successful. + * + * Allocates memory and constructs forcetree until successful; + * currently, there are two valid modes: forcetree only for gas or for all + * particles. + * + * \param[in] mode Mode: all particles or just gas cells. + * \param[in] optimized_domain_mapping Handed over to force_treebuild. + * \param[in] insert_only_primary Handed over to force_treebuild. + * \param[in] timebin Handed over to force_treebuild. + * + * \return Number of nodes in tree. + */ +int construct_forcetree(int mode, int optimized_domain_mapping, int insert_only_primary, int timebin) +{ + int npart, Tree_NumNodes = 0; + + do + { + /* Note: force_treebuild will call force_treefree if it is about to return a negative value! + * Therefore, this has to be allocated within the loop! The only exception is when + * insert_only_primary == 2, in which case the code assumes that the forcetree is already + * allocated (this happens only in fof.c). In this case, force_treeallocate is not called + * during the first loop. + */ + if(insert_only_primary != 2 || Tree_NumNodes < 0) + force_treeallocate(NumPart, All.MaxPart); /* reallocate force tree structure */ + + /* prepare variables for force_treebuild call */ + switch(mode) + { + case 0: /* all particles */ + { + npart = NumPart; + break; + } + case 1: /* only gas particles */ + { + npart = NumGas; + break; + } + default: + { + mpi_terminate("FORCETREE: construct_forcetree: invalid mode!\n"); + } + } + + Tree_NumNodes = force_treebuild(npart, optimized_domain_mapping, insert_only_primary, timebin); + } + while(Tree_NumNodes < 0); + + return Tree_NumNodes; +} + +/*! \brief Constructs the gravitational oct-tree and handles errors. + * + * \param[in] npart Number of particles on local task. + * \param[in] optimized_domain_mapping Specifies if mapping of the top-level + * nodes to processors may be readjusted. + * \param[in] insert_only_primary If this is set, only particles of the types + * set in FOF_PRIMARY_LINK_TYPES are inserted. + * \param[in] timebin Current timebin; needed for HIERARCHICAL_GRAVITY. + * + * \return number of local+top nodes of the constructed tree. + */ +int force_treebuild(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin) +{ + int i, flag; + +#ifdef HIERARCHICAL_GRAVITY + NTreeInsert = TimeBinsGravity.NActiveParticles; + optimized_domain_mapping = 0; +#else /* #ifdef HIERARCHICAL_GRAVITY */ + NTreeInsert = npart; +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ + + TIMER_START(CPU_TREEBUILD); + + long long loc_insert = NTreeInsert, tot_insert; + MPI_Reduce(&loc_insert, &tot_insert, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + + mpi_printf("FORCETREE: Tree construction. (inserting %lld points)\n", tot_insert); + + TIMER_STOPSTART(CPU_TREEBUILD, CPU_TREEBUILD_INSERT); + + int flag_single = force_treebuild_construct(npart, optimized_domain_mapping, insert_only_primary, timebin); + + TIMER_STOPSTART(CPU_TREEBUILD_INSERT, CPU_TREEBUILD); + + MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + if(flag < 0) + { + /* tree construction was not successful and needs to be repeated */ + if(flag_single != -2) + { + myfree(Tree_Points); + } + + force_treefree(); + + if(flag == -3) + { + /* we need to do an extra domain decomposition to recover from an out-of-box condition for a particle, + which can happen if GRAVITY_NOT_PERIODIC is used */ + ngb_treefree(); + domain_free(); + + domain_Decomposition(); + + ngb_treeallocate(); + ngb_treebuild(NumGas); + } + else + { + All.TreeAllocFactor *= 1.15; + mpi_printf("FORCETREE: Increasing TreeAllocFactor, new value=%g\n", All.TreeAllocFactor); + + if(All.TreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + char buf[500]; + sprintf(buf, + "task %d: looks like a serious problem in tree construction, stopping with particle dump. Tree_NumNodes=%d " + "Tree_MaxNodes=%d Tree_NumPartImported=%d NumPart=%d\n", + ThisTask, Tree_NumNodes, Tree_MaxNodes, Tree_NumPartImported, NumPart); + dump_particles(); + terminate(buf); + } + } + + TIMER_STOP(CPU_TREEBUILD); /* stop timer before returning */ + return -1; /* stop right here with error code to invoke a new call of this function, possibly with changed values for npart */ + } /* if(flag < 0) */ + + Nextnode = (int *)mymalloc_movable(&Nextnode, "Nextnode", (Tree_MaxPart + NTopleaves + Tree_NumPartImported) * sizeof(int)); + Father = (int *)mymalloc_movable(&Father, "Father", (Tree_MaxPart + Tree_NumPartImported) * sizeof(int)); + + for(i = 0; i < Tree_MaxPart + Tree_NumPartImported; i++) + Father[i] = -1; + + TIMER_STOPSTART(CPU_TREEBUILD, CPU_TREEBUILD_BRANCHES); + + /* insert the pseudo particles that represent the mass distribution of other domains */ + force_insert_pseudo_particles(); + + /* now compute the multipole moments recursively */ + int last = -1; + + force_update_node_recursive(Tree_MaxPart, -1, -1, &last); + + if(last >= Tree_MaxPart) + { + if(last >= Tree_MaxPart + Tree_MaxNodes) /* a pseudo-particle or imported particle */ + Nextnode[last - Tree_MaxNodes] = -1; + else + Nodes[last].u.d.nextnode = -1; + } + else + Nextnode[last] = -1; + + TIMER_STOPSTART(CPU_TREEBUILD_BRANCHES, CPU_TREEBUILD_TOPLEVEL); + + force_exchange_topleafdata(); + + Tree_NextFreeNode = Tree_MaxPart + 1; + force_treeupdate_toplevel(Tree_MaxPart, 0, 1, 0, 0, 0); + + TIMER_STOPSTART(CPU_TREEBUILD_TOPLEVEL, CPU_LOGS); + +#ifdef HIERARCHICAL_GRAVITY + if(timebin == All.HighestOccupiedGravTimeBin) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + double locdata[2] = {Tree_NumPartImported, Tree_NumNodes}, sumdata[2]; + MPI_Reduce(locdata, sumdata, 2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + double tot_imported = sumdata[0]; + double tot_numnodes = sumdata[1]; + + mpi_printf( + "FORCETREE: Tree construction done. =%g =%g NTopnodes=%d NTopleaves=%d " + "tree-build-scalability=%g\n", + tot_imported / (All.TotNumPart + 1.0e-60), tot_numnodes / NTask, NTopnodes, NTopleaves, + ((double)((tot_numnodes - NTask * ((double)NTopnodes)) + NTopnodes)) / (tot_numnodes + 1.0e-60)); + } +#ifdef HIERARCHICAL_GRAVITY + else + mpi_printf("FORCETREE: Tree construction done.\n"); +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + TIMER_STOP(CPU_LOGS); + + return Tree_NumNodes; +} + +/*! \brief Constructs the gravitational oct-tree. + * + * The index convention for accessing tree nodes is the following: + * node index + * [0... Tree_MaxPart-1] references single particles, + * the indices + * [Tree_MaxPart... Tree_MaxPart+Tree_MaxNodes-1] references tree nodes + * [Tree_MaxPart+Tree_MaxNodes... Tree_MaxPart+Tree_MaxNodes+NTopleaves-1] + * references "pseudo particles", i.e. mark branches on foreign CPUs + * [Tree_MaxPart+Tree_MaxNodes+NTopleaves... + * Tree_MaxPart+Tree_MaxNodes+NTopleaves+Tree_NumPartImported-1] + * references imported points. + * + * the pointer `Nodes' is shifted such that Nodes[Tree_MaxPart] gives the + * first tree node (i.e. the root node). + * + * \param[in] npart Number of particles on local task. + * \param[in] optimized_domain_mapping Specifies if mapping of the top-level + * nodes to processors may be readjusted. + * \param[in] insert_only_primary If this is set, only particles of the types + * set in FOF_PRIMARY_LINK_TYPES are inserted. + * \param[in] timebin (unused). + * + * \return if successful returns the number of local+top nodes of the + * constructed tree; + * -1 if the number of allocated tree nodes is too small; + * -2 if the number of allocated tree nodes is even too small to fit + * the top nodes; + * -3 if a particle out of domain box condition was encountered. + */ +int force_treebuild_construct(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin) +{ + int idx, i, j, no, flag = 0; + int ngrp, recvTask, count_ListNoData, *no_place = NULL; + unsigned long long *intposp; + MyDouble *posp; + +#ifdef DISABLE_OPTIMIZE_DOMAIN_MAPPING + optimized_domain_mapping = 0; +#endif /* #ifdef DISABLE_OPTIMIZE_DOMAIN_MAPPING */ + +#if !defined(GRAVITY_NOT_PERIODIC) + double boxsize[3]; + boxsize[0] = boxSize_X; + boxsize[1] = boxSize_Y; + boxsize[2] = boxSize_Z; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + + /* create an empty root node */ + Tree_NextFreeNode = Tree_MaxPart; /* index of first free node */ + struct NODE *nfreep = &Nodes[Tree_NextFreeNode]; /* select first node */ + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + nfreep->len = DomainLen; + for(j = 0; j < 3; j++) + nfreep->center[j] = DomainCenter[j]; + + Tree_NumNodes = 1; + Tree_NextFreeNode++; + + /* create a set of empty nodes corresponding to the top-level domain + * grid. We need to generate these nodes first to make sure that we have a + * complete top-level tree which allows the easy insertion of the + * pseudo-particles at the right place + */ + if(force_create_empty_nodes(Tree_MaxPart, 0, 1, 0, 0, 0) < 0) + return -2; + + Tree_FirstNonTopLevelNode = Tree_NextFreeNode; + + /* if a high-resolution region in a global tree is used, we need to generate + * an additional set of empty nodes to make sure that we have a complete + * top-level tree for the high-resolution inset + */ + + /* we first do a dummy allocation here that we'll resize later if needed, in which case the following arrays will have to be moved + * once. */ + int guess_nimported = 1.2 * NumPart; + + Tree_Points = + (struct treepoint_data *)mymalloc_movable(&Tree_Points, "Tree_Points", guess_nimported * sizeof(struct treepoint_data)); + + th_list = (int *)mymalloc_movable(&th_list, "th_list", NumPart * sizeof(int)); + level_list = (unsigned char *)mymalloc_movable(&level_list, "level_list", NumPart * sizeof(unsigned char)); + Tree_IntPos_list = + (unsigned long long *)mymalloc_movable(&Tree_IntPos_list, "Tree_IntPos_list", 3 * NumPart * sizeof(unsigned long long)); + + if(NumPart < NTreeInsert) + { + terminate("ERROR: NumPart %d, NTreeInsert %d! This should not happen!", NumPart, NTreeInsert); + } + + /* first check whether particles are still in domain box */ + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + + if(P[i].Ti_Current != All.Ti_Current) + drift_particle(i, All.Ti_Current); + + posp = &Tree_Pos_list[i * 3]; + + for(j = 0; j < 3; j++, posp++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + *posp = SphP[i].Center[j]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + *posp = P[i].Pos[j]; + +#if !defined(GRAVITY_NOT_PERIODIC) + if(*posp < 0) + *posp += boxsize[j]; + if(*posp >= boxsize[j]) + *posp -= boxsize[j]; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + if(*posp < DomainCorner[j] || *posp >= DomainCorner[j] + DomainLen) + { + flag = 1; + break; + } + } + } + +#if defined(GRAVITY_NOT_PERIODIC) + int flag_sum; + MPI_Allreduce(&flag, &flag_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if(flag_sum) + { + mpi_printf( + "FORCETREE: Particle out of domain box condition was triggered. Need to do an (unplanned) new domain decomposition.\n"); + myfree(Tree_IntPos_list); + myfree(level_list); + myfree(th_list); + return -3; + } +#else /* #if defined(GRAVITY_NOT_PERIODIC) */ + if(flag) + { + char buf[1000]; + sprintf(buf, "i=%d ID=%lld type=%d moved out of box. Pos[j=%d]=%g DomainCorner[%d]=%g DomainLen=%g", i, (long long)P[i].ID, + P[i].Type, j, P[i].Pos[j], j, DomainCorner[j], DomainLen); + terminate(buf); + } +#endif /* #if defined(GRAVITY_NOT_PERIODIC) #else */ + +#if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) + double mass_highres = 0, mass_lowres = 0; + for(int idx = 0; idx < NTreeInsert; idx++) + { + int i = INDEX(idx); + if(i < 0) + continue; + +#ifdef PLACEHIGHRESREGION + if(pmforce_is_particle_high_res(P[i].Type, &Tree_Pos_list[3 * i])) + mass_highres += P[i].Mass; + else +#endif /* #ifdef PLACEHIGHRESREGION */ + mass_lowres += P[i].Mass; + } + double mass_pmregions[2] = {mass_lowres, mass_highres}; + MPI_Allreduce(mass_pmregions, All.MassPMregions, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif /* #if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) */ + + /* now we determine for each point the insertion top-level node, and the task on which this lies */ + if(optimized_domain_mapping) + { + TaskCost = mymalloc("TaskCost", NTask * sizeof(double)); + TaskCount = mymalloc("TaskCount", NTask * sizeof(int)); + DomainCost = mymalloc("DomainCost", NTopleaves * sizeof(double)); + DomainCount = mymalloc("DomainCount", NTopleaves * sizeof(int)); + ListNoData = mymalloc("ListNoData", NTopleaves * sizeof(struct no_list_data)); + no_place = mymalloc("no_place", NTopleaves * sizeof(int)); + + memset(no_place, -1, NTopleaves * sizeof(int)); + + for(j = 0; j < NTopleaves; j++) + DomainCost[j] = 0; + for(j = 0; j < NTopleaves; j++) + DomainCount[j] = 0; + for(j = 0; j < NTask; j++) + TaskCost[j] = 0; + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + count_ListNoData = 0; + } + + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + + posp = &Tree_Pos_list[i * 3]; + + unsigned long long xxb = force_double_to_int(((*posp++ - DomainCorner[0]) * DomainInverseLen) + 1.0); + unsigned long long yyb = force_double_to_int(((*posp++ - DomainCorner[1]) * DomainInverseLen) + 1.0); + unsigned long long zzb = force_double_to_int(((*posp++ - DomainCorner[2]) * DomainInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + unsigned char levels = 0; + + intposp = &Tree_IntPos_list[i * 3]; + *intposp++ = xxb; + *intposp++ = yyb; + *intposp++ = zzb; + + no = 0; + while(TopNodes[no].Daughter >= 0) /* walk down top tree to find correct leaf */ + { + unsigned char subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + mask >>= 1; + levels++; + + no = TopNodes[no].Daughter + TopNodes[no].MortonToPeanoSubnode[subnode]; + } + + no = TopNodes[no].Leaf; + + th_list[i] = no; + level_list[i] = levels; + + if(optimized_domain_mapping) + { + /* find costs for all top leaves */ + + int bin = All.HighestActiveTimeBin; + double cost; + + if(domain_bintolevel[bin] >= 0) + cost = MIN_FLOAT_NUMBER + P[i].GravCost[domain_bintolevel[bin]] * domain_grav_weight[bin]; + else + { + if(domain_refbin[bin] >= 0) + cost = MIN_FLOAT_NUMBER + P[i].GravCost[domain_bintolevel[domain_refbin[bin]]] * domain_grav_weight[bin]; + else + cost = 1.0; + } + + int task = DomainTask[no]; + TaskCost[task] += cost; + + if(task == ThisTask) + { + DomainCost[no] += cost; + DomainCount[no]++; + } + else + { + int p = no_place[no]; + if(p >= 0) + { + ListNoData[p].domainCost += cost; + ListNoData[p].domainCount++; + } + else + { + Send_count[task]++; + p = count_ListNoData++; + no_place[no] = p; + ListNoData[p].task = task; + ListNoData[p].no = no; + ListNoData[p].domainCost = cost; + ListNoData[p].domainCount = 1; + } + } + } + } + + if(optimized_domain_mapping) + { + /* if necessary, re-adjust the mapping of the top-level nodes to the processors */ + + if(All.Ti_Current > 0) + { + double current_balance, impact; + current_balance = force_get_current_balance(&impact); + + mpi_printf("FORCETREE: current balance= %g | %g\n", current_balance, impact); + + if(All.HighestActiveTimeBin < + All.SmallestTimeBinWithDomainDecomposition) /* only do this for steps which did not do a domain decomposition */ + { + if(impact > MAX_IMPACT_BEFORE_OPTIMIZATION) + { + force_get_global_cost_for_leavenodes(count_ListNoData); + force_optimize_domain_mapping(); + } + else + { + mpi_printf( + "FORCETREE: we're not trying to optimize further because overall imbalance impact is only %g (threshold is " + "%g)\n", + impact, MAX_IMPACT_BEFORE_OPTIMIZATION); + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + } + } + else + { + mpi_printf("FORCETREE: we're not trying to optimize futher because we just did a domain decomposition\n"); + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + } + } + else + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + } + else + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + + if(optimized_domain_mapping) + { + myfree(no_place); + myfree(ListNoData); + myfree(DomainCount); + myfree(DomainCost); + myfree(TaskCount); + myfree(TaskCost); + } + + for(j = 0; j < NTask; j++) + { + Force_Send_count[j] = 0; + } + + for(idx = 0; idx < NTreeInsert; idx++) /* make list of insertion top leaf and task for all particles */ + { + i = INDEX(idx); + if(i < 0) + continue; + + no = th_list[i]; + th_list[i] = DomainNodeIndex[no]; + + int task = DomainNewTask[no]; + + Tree_Task_list[i] = task; + + if(task != ThisTask) + { + Force_Send_count[task]++; + } + } + + MPI_Alltoall(Force_Send_count, 1, MPI_INT, Force_Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, Tree_NumPartImported = 0, Tree_NumPartExported = 0, Force_Recv_offset[0] = 0, Force_Send_offset[0] = 0; j < NTask; j++) + { + Tree_NumPartImported += Force_Recv_count[j]; + Tree_NumPartExported += Force_Send_count[j]; + if(j > 0) + { + Force_Send_offset[j] = Force_Send_offset[j - 1] + Force_Send_count[j - 1]; + Force_Recv_offset[j] = Force_Recv_offset[j - 1] + Force_Recv_count[j - 1]; + } + } + + if(Tree_NumPartImported > guess_nimported) + { + printf("ThisTask=%d: Tree_NumPartImported=%d NumPart=%d\n", ThisTask, Tree_NumPartImported, NumPart); + Tree_Points = (struct treepoint_data *)myrealloc_movable(Tree_Points, Tree_NumPartImported * sizeof(struct treepoint_data)); + } + + if(Tree_NumPartImported > 0.25 * NumPart) + { + Tree_MaxNodes = (int)(All.TreeAllocFactor * (NumPart + Tree_NumPartImported)) + NTopnodes; + + Nodes += Tree_MaxPart; + Nodes = (struct NODE *)myrealloc_movable(Nodes, (Tree_MaxNodes + 1) * sizeof(struct NODE)); + Nodes -= Tree_MaxPart; + +#ifdef MULTIPLE_NODE_SOFTENING + ExtNodes += Tree_MaxPart; + ExtNodes = (struct ExtNODE *)myrealloc_movable(ExtNodes, (Tree_MaxNodes + 1) * sizeof(struct ExtNODE)); + ExtNodes -= Tree_MaxPart; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + + struct treepoint_data *export_Tree_Points = + (struct treepoint_data *)mymalloc("export_Tree_Points", Tree_NumPartExported * sizeof(struct treepoint_data)); + + for(j = 0; j < NTask; j++) + { + Force_Send_count[j] = 0; + } + + for(idx = 0; idx < NTreeInsert; idx++) /* prepare particle data to be copied to other tasks */ + { + i = INDEX(idx); + if(i < 0) + continue; + + int task = Tree_Task_list[i]; + + if(task != ThisTask) + { + int n = Force_Send_offset[task] + Force_Send_count[task]++; + + /* this point has to go to another task */ + export_Tree_Points[n].Pos[0] = Tree_Pos_list[3 * i + 0]; + export_Tree_Points[n].Pos[1] = Tree_Pos_list[3 * i + 1]; + export_Tree_Points[n].Pos[2] = Tree_Pos_list[3 * i + 2]; + export_Tree_Points[n].IntPos[0] = Tree_IntPos_list[3 * i + 0]; + export_Tree_Points[n].IntPos[1] = Tree_IntPos_list[3 * i + 1]; + export_Tree_Points[n].IntPos[2] = Tree_IntPos_list[3 * i + 2]; + export_Tree_Points[n].Mass = P[i].Mass; + export_Tree_Points[n].OldAcc = P[i].OldAcc; + export_Tree_Points[n].SofteningType = P[i].SofteningType; + export_Tree_Points[n].index = i; + export_Tree_Points[n].Type = P[i].Type; + export_Tree_Points[n].th = th_list[i]; + export_Tree_Points[n].level = level_list[i]; +#ifndef HIERARCHICAL_GRAVITY + if(TimeBinSynchronized[P[i].TimeBinGrav]) + export_Tree_Points[n].ActiveFlag = 1; + else + export_Tree_Points[n].ActiveFlag = 0; +#endif /* #ifndef HIERARCHICAL_GRAVITY */ + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(Force_Send_count[recvTask] > 0 || Force_Recv_count[recvTask] > 0) + MPI_Sendrecv(&export_Tree_Points[Force_Send_offset[recvTask]], Force_Send_count[recvTask] * sizeof(struct treepoint_data), + MPI_BYTE, recvTask, TAG_DENS_A, &Tree_Points[Force_Recv_offset[recvTask]], + Force_Recv_count[recvTask] * sizeof(struct treepoint_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + myfree(export_Tree_Points); + + Tree_ImportedNodeOffset = Tree_MaxPart + Tree_MaxNodes + NTopleaves; + + int full_flag = 0; + + /* now we insert all particles */ + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + +#ifdef NO_GAS_SELFGRAVITY + if(P[i].Type == 0) + continue; +#endif /* #ifdef NO_GAS_SELFGRAVITY */ +#ifdef NO_SELFGRAVITY_TYPE + if(P[i].Type == NO_SELFGRAVITY_TYPE) + continue; +#endif /* #ifdef NO_SELFGRAVITY_TYPE */ +#if defined(FOF) || defined(SUBFIND) + if(insert_only_primary == 1) + { + if(!((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + continue; + } + else if(insert_only_primary == 2) + { + if(!((1 << P[i].Type) & (FOF_SECONDARY_LINK_TARGET_TYPES))) + continue; + } +#endif /* #if defined(FOF) || defined(SUBFIND) */ + if(Tree_Task_list[i] == ThisTask) + { + if(force_treebuild_insert_single_point(i, &Tree_IntPos_list[3 * i], th_list[i], level_list[i]) < 0) + { + full_flag = 1; + break; + } + } + } + + if(full_flag == 0) /* only continue if previous step was successful */ + { + for(i = 0; i < Tree_NumPartImported; i++) + { +#ifdef NO_GAS_SELFGRAVITY + if(Tree_Points[i].Type == 0) + continue; +#endif /* #ifdef NO_GAS_SELFGRAVITY */ +#ifdef NO_SELFGRAVITY_TYPE + if(Tree_Points[i].Type == NO_SELFGRAVITY_TYPE) + continue; +#endif /* #ifdef NO_SELFGRAVITY_TYPE */ +#if defined(FOF) || defined(SUBFIND) + if(insert_only_primary == 1) + { + if(!((1 << Tree_Points[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + continue; + } + else if(insert_only_primary == 2) + { + if(!((1 << Tree_Points[i].Type) & (FOF_SECONDARY_LINK_TARGET_TYPES))) + continue; + } +#endif /* #if defined(FOF) || defined(SUBFIND) */ + if(force_treebuild_insert_single_point(i + Tree_ImportedNodeOffset, Tree_Points[i].IntPos, Tree_Points[i].th, + Tree_Points[i].level) < 0) + { + full_flag = 1; + break; + } + } + } + + myfree_movable(Tree_IntPos_list); + myfree_movable(level_list); + myfree_movable(th_list); + + if(full_flag) + return -1; + +#ifdef ADDBACKGROUNDGRID + if(force_add_empty_nodes()) + return -1; +#endif /* #ifdef ADDBACKGROUNDGRID */ + + return Tree_NumNodes; +} + +/*! \brief Inserts a single particle into the gravitational tree. + * + * \param[in] i Index of particle. + * \param[in] intpos Integer representation of particle position. + * \param[in] th Target node. + * \param[in] levels Level of target node. + * + * \return 0 if successful; + * -1 if too few nodes have been allocated in the Nodes array + */ +int force_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char levels) +{ + int j, parent = -1; + unsigned char subnode = 0; + unsigned long long xxb = intpos[0]; + unsigned long long yyb = intpos[1]; + unsigned long long zzb = intpos[2]; + unsigned long long mask = ((unsigned long long)1) << ((52 - 1) - levels); + unsigned char shiftx = (52 - 1) - levels; + unsigned char shifty = (52 - 2) - levels; + unsigned char shiftz = (52 - 3) - levels; + signed long long centermask = (0xFFF0000000000000llu); + unsigned long long *intppos; + centermask >>= levels; + + while(1) + { + if(th >= Tree_MaxPart && th < Tree_ImportedNodeOffset) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEEL < gravitational softening length + */ + for(j = 0; j < 8; j++) + { + if(Nodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + int nn = Nodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; + th = nn; + } + else + { + /* here we have found an empty slot where we can attach + * the new particle as a leaf. + */ + Nodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* We try to insert into a leaf with a single particle. Need + * to generate a new internal node at this point. + */ + Nodes[parent].u.suns[subnode] = Tree_NextFreeNode; + struct NODE *nfreep = &Nodes[Tree_NextFreeNode]; + + double len = ((double)(mask << 1)) * DomainBigFac; + double cx = ((double)((xxb & centermask) | mask)) * DomainBigFac + DomainCorner[0]; + double cy = ((double)((yyb & centermask) | mask)) * DomainBigFac + DomainCorner[1]; + double cz = ((double)((zzb & centermask) | mask)) * DomainBigFac + DomainCorner[2]; + + nfreep->len = len; + nfreep->center[0] = cx; + nfreep->center[1] = cy; + nfreep->center[2] = cz; + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + if(th >= Tree_ImportedNodeOffset) + intppos = Tree_Points[th - Tree_ImportedNodeOffset].IntPos; + else + intppos = &Tree_IntPos_list[3 * th]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = th; + + th = Tree_NextFreeNode; /* resume trying to insert the new particle the newly created internal node */ + Tree_NumNodes++; + Tree_NextFreeNode++; + + if(Tree_NumNodes >= Tree_MaxNodes) + { + return -1; + } + } + } + + return 0; +} + +/*! \brief Distributes the gravity costs of each node among the particles it + * contains. + * + * \return void + */ +void force_assign_cost_values(void) +{ + int idx, i, ngrp, recvTask; + + if(TakeLevel >= 0) + { + int thread; + + /* consolidate the cost measurements done by the different threads */ + for(thread = 1; thread < NUM_THREADS; thread++) + for(i = 0; i < NumPart; i++) + Thread[0].P_CostCount[i] += Thread[thread].P_CostCount[i]; + + for(thread = 1; thread < NUM_THREADS; thread++) + for(i = 0; i < Tree_NumNodes; i++) + Thread[0].Node_CostCount[i + Tree_MaxPart] += Thread[thread].Node_CostCount[i + Tree_MaxPart]; + + for(thread = 1; thread < NUM_THREADS; thread++) + for(i = 0; i < Tree_NumPartImported; i++) + Thread[0].TreePoints_CostCount[i] += Thread[thread].TreePoints_CostCount[i]; + +#ifdef VERBOSE + /* calculate some check sums to validate the total cost assignment */ + double sumbefore = 0, sumbeforetot; + for(i = 0; i < NumPart; i++) + sumbefore += P[i].GravCost[TakeLevel]; + MPI_Allreduce(&sumbefore, &sumbeforetot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + double nodecost = 0, nodecosttot; + for(i = 0; i < Tree_NumNodes; i++) + nodecost += Thread[0].Node_CostCount[i + Tree_MaxPart]; + MPI_Allreduce(&nodecost, &nodecosttot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + double importedcost = 0, importedcosttot; + for(i = 0; i < Tree_NumPartImported; i++) + importedcost += Thread[0].TreePoints_CostCount[i]; + MPI_Allreduce(&importedcost, &importedcosttot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + double partcost = 0, partcosttot; + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + + { + int no = Father[i]; + + if(no >= 0) + partcost += Thread[0].P_CostCount[i]; + } + } + MPI_Allreduce(&partcost, &partcosttot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif /* #ifdef VERBOSE */ + + double *loc_cost = mymalloc("loc_cost", NTopnodes * sizeof(double)); + double *glob_cost = mymalloc("glob_cost", NTopnodes * sizeof(double)); + + for(i = 0; i < NTopnodes; i++) + loc_cost[i] = Thread[0].Node_CostCount[i + Tree_MaxPart]; + + MPI_Allreduce(loc_cost, glob_cost, NTopnodes, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + for(i = 0; i < NTopnodes; i++) + Thread[0].Node_CostCount[i + Tree_MaxPart] = glob_cost[i]; + + myfree(glob_cost); + myfree(loc_cost); + + for(i = 0; i < NumPart; i++) + P[i].GravCost[TakeLevel] = 0; + + /* distribute costs of parent nodes to particles */ + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + + { + double sum = Thread[0].P_CostCount[i]; + + int no = Father[i]; + + while(no >= 0) + { + if(Nodes[no].u.d.mass > 0) + sum += Thread[0].Node_CostCount[no] * (P[i].Mass / Nodes[no].u.d.mass); + + no = Nodes[no].u.d.father; + } + + P[i].GravCost[TakeLevel] = sum; + } + } + + /* Now, if we moved points to other CPUs, we need to collect these cost values */ + struct gravcost_data + { + float GravCost; + int index; + } * gdata_export, *gdata_import; + + gdata_export = mymalloc("grav_data_export", Tree_NumPartExported * sizeof(struct gravcost_data)); + gdata_import = mymalloc("grav_data_import", Tree_NumPartImported * sizeof(struct gravcost_data)); + + for(i = 0; i < Tree_NumPartImported; i++) + { + double sum = Thread[0].TreePoints_CostCount[i]; + + int no = Father[i + Tree_MaxPart]; + + while(no >= 0) + { + if(Nodes[no].u.d.mass > 0) + sum += Thread[0].Node_CostCount[no] * Tree_Points[i].Mass / Nodes[no].u.d.mass; + + no = Nodes[no].u.d.father; + } + + gdata_import[i].GravCost = sum; + gdata_import[i].index = Tree_Points[i].index; + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Force_Send_count[recvTask] > 0 || Force_Recv_count[recvTask] > 0) + { + MPI_Sendrecv(&gdata_import[Force_Recv_offset[recvTask]], Force_Recv_count[recvTask] * sizeof(struct gravcost_data), + MPI_BYTE, recvTask, TAG_DENS_A, &gdata_export[Force_Send_offset[recvTask]], + Force_Send_count[recvTask] * sizeof(struct gravcost_data), MPI_BYTE, recvTask, TAG_DENS_A, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + for(i = 0; i < Tree_NumPartExported; i++) + P[gdata_export[i].index].GravCost[TakeLevel] = gdata_export[i].GravCost; + + myfree(gdata_import); + myfree(gdata_export); + +#ifdef VERBOSE + double sum = 0, sumtot; + for(i = 0; i < NumPart; i++) + sum += P[i].GravCost[TakeLevel]; + MPI_Allreduce(&sum, &sumtot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + mpi_printf( + "FORCETREE: Cost assignment for TakeLevel=%d, highest active-TimeBin=%d yields cost=%g|%g (before %g) nodecosttot=%g " + "partcosttot=%g importedcosttot=%g\n", + TakeLevel, All.HighestActiveTimeBin, sumtot, nodecosttot + partcosttot + importedcosttot, sumbeforetot, nodecosttot, + partcosttot, importedcosttot); +#else /* #ifdef VERBOSE */ + mpi_printf("FORCETREE: Cost assignment for TakeLevel=%d, highest active-TimeBin=%d\n", TakeLevel, All.HighestActiveTimeBin); +#endif /* #ifdef VERBOSE #else */ + } +} + +/*! \brief Recursively creates a set of empty tree nodes which + * corresponds to the top-level tree for the domain grid. + * + * This is done to ensure that this top-level tree is always "complete" so + * that we can easily associate the pseudo-particles of other CPUs with + * tree-nodes at a given level in the tree, even when the particle population + * is so sparse that some of these nodes are actually empty. + * + * \param[in] no Parent node for which daughter nodes shall be created. + * \param[in] topnode Index of the parent node in the 'TopNodes' array. + * \param[in] bits 2^bits is the number of nodes per dimension at the level of + * the daughter nodes. + * \param[in] x Position of the parent node in the x direction, falls in the + * range [0,2^(bits-1) - 1]. + * \param[in] y Position of the parent node in the y direction, falls in the + * range [0,2^(bits-1) - 1]. + * \param[in] z Position of the parent node in the z direction, falls in the + * range [0,2^(bits-1) - 1]. + * + * \return 0 if successful; + * -1 if number of allocated tree nodes is too small to fit the newly + * created nodes. + */ +int force_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z) +{ + if(TopNodes[topnode].Daughter >= 0) + { + for(int i = 0; i < 2; i++) /* loop over daughter nodes */ + for(int j = 0; j < 2; j++) + for(int k = 0; k < 2; k++) + { + if(Tree_NumNodes >= Tree_MaxNodes) + { + if(All.TreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + char buf[500]; + sprintf(buf, "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", ThisTask, + NTopnodes); + dump_particles(); + terminate(buf); + } + return -1; + } + + int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + int count = i + 2 * j + 4 * k; + + Nodes[no].u.suns[count] = Tree_NextFreeNode; + + double lenhalf = 0.25 * Nodes[no].len; + Nodes[Tree_NextFreeNode].len = 0.5 * Nodes[no].len; + Nodes[Tree_NextFreeNode].center[0] = Nodes[no].center[0] + (2 * i - 1) * lenhalf; + Nodes[Tree_NextFreeNode].center[1] = Nodes[no].center[1] + (2 * j - 1) * lenhalf; + Nodes[Tree_NextFreeNode].center[2] = Nodes[no].center[2] + (2 * k - 1) * lenhalf; + + for(int n = 0; n < 8; n++) + Nodes[Tree_NextFreeNode].u.suns[n] = -1; + + if(TopNodes[TopNodes[topnode].Daughter + sub].Daughter == -1) + DomainNodeIndex[TopNodes[TopNodes[topnode].Daughter + sub].Leaf] = Tree_NextFreeNode; + + Tree_NextFreeNode++; + Tree_NumNodes++; + + if(force_create_empty_nodes(Tree_NextFreeNode - 1, TopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, 2 * y + j, + 2 * z + k) < 0) + return -1; /* create granddaughter nodes for current daughter node */ + } + } + + return 0; +} + +/*! \brief Inserts pseudo particles. + * + * This function inserts pseudo-particles which will represent the mass + * distribution of the other CPUs. Initially, the mass of the + * pseudo-particles is set to zero, and their coordinate is set to the + * center of the domain-cell they correspond to. These quantities will be + * updated later on. + * + * \return void + */ +void force_insert_pseudo_particles(void) +{ + for(int i = 0; i < NTopleaves; i++) + { + int index = DomainNodeIndex[i]; + + if(DomainNewTask[i] != ThisTask) + Nodes[index].u.suns[0] = Tree_MaxPart + Tree_MaxNodes + i; + } +} + +/*! \brief Determines multipole moments. + * + * This routine determines the multipole moments for a given internal node + * and all its subnodes using a recursive computation. The result is + * stored in the Nodes[] structure in the sequence of this tree-walk. + * + * \param[in] no Node for which the moments shall be found. + * \param[in] sib Sibling of node no. + * \param[in] father Father node of node no. + * \param[in, out] last Last node for which this function was called, or -1 + * when called for root node. + * + * \return void + */ +void force_update_node_recursive(int no, int sib, int father, int *last) +{ + int j, jj, p, pp, nextsib, suns[8]; + double s[3], mass; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(no >= Tree_MaxPart && no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + for(j = 0; j < 8; j++) + suns[j] = Nodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + if(*last >= 0) + { + if(*last >= Tree_MaxPart) + { + if(*last >= Tree_MaxPart + Tree_MaxNodes) + Nextnode[*last - Tree_MaxNodes] = no; /* a pseudo-particle or imported point */ + else + Nodes[*last].u.d.nextnode = no; + } + else + Nextnode[*last] = no; + } + + *last = no; + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; + +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + force_update_node_recursive(p, nextsib, no, last); + + if(p < Tree_MaxPart) /* a particle */ + { + MyDouble *pos = &Tree_Pos_list[3 * p]; + + mass += P[p].Mass; + s[0] += P[p].Mass * pos[0]; + s[1] += P[p].Mass * pos[1]; + s[2] += P[p].Mass * pos[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[P[p].SofteningType]) + maxsofttype = P[p].SofteningType; + +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[P[p].Type == 0 ? 0 : P[p].SofteningType] += P[p].Mass; + + if(P[p].Type == 0) + { + if(maxhydrosofttype < P[p].SofteningType) + maxhydrosofttype = P[p].SofteningType; + if(minhydrosofttype > P[p].SofteningType) + minhydrosofttype = P[p].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[P[p].SofteningType] += P[p].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else if(p < Tree_MaxPart + Tree_MaxNodes) /* an internal node */ + { + mass += Nodes[p].u.d.mass; + s[0] += Nodes[p].u.d.mass * Nodes[p].u.d.s[0]; + s[1] += Nodes[p].u.d.mass * Nodes[p].u.d.s[1]; + s[2] += Nodes[p].u.d.mass * Nodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[Nodes[p].u.d.maxsofttype]) + maxsofttype = Nodes[p].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += ExtNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < Nodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = Nodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > Nodes[p].u.d.minhydrosofttype) + minhydrosofttype = Nodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else if(p < Tree_MaxPart + Tree_MaxNodes + NTopleaves) /* a pseudo particle */ + { + /* nothing to be done here because the mass of the + * pseudo-particle is still zero. This will be changed + * later. + */ + } + else + { /* an imported point */ + int n = p - (Tree_MaxPart + Tree_MaxNodes + NTopleaves); + + if(n >= Tree_NumPartImported) + terminate("n >= Tree_NumPartImported"); + + mass += Tree_Points[n].Mass; + s[0] += Tree_Points[n].Mass * Tree_Points[n].Pos[0]; + s[1] += Tree_Points[n].Mass * Tree_Points[n].Pos[1]; + s[2] += Tree_Points[n].Mass * Tree_Points[n].Pos[2]; + + /* Might not need the following routine */ + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[Tree_Points[n].SofteningType]) + maxsofttype = Tree_Points[n].SofteningType; + +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[Tree_Points[n].Type == 0 ? 0 : Tree_Points[n].SofteningType] += Tree_Points[n].Mass; + + if(Tree_Points[n].Type == 0) + { + if(maxhydrosofttype < Tree_Points[n].SofteningType) + maxhydrosofttype = Tree_Points[n].SofteningType; + if(minhydrosofttype > Tree_Points[n].SofteningType) + minhydrosofttype = Tree_Points[n].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[Tree_Points[n].SofteningType] += Tree_Points[n].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + } + + if(mass) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = Nodes[no].center[0]; + s[1] = Nodes[no].center[1]; + s[2] = Nodes[no].center[2]; + } + + Nodes[no].u.d.mass = mass; + Nodes[no].u.d.s[0] = s[0]; + Nodes[no].u.d.s[1] = s[1]; + Nodes[no].u.d.s[2] = s[2]; + Nodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + ExtNodes[no].mass_per_type[k] = mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + Nodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + Nodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + Nodes[no].u.d.sibling = sib; + Nodes[no].u.d.father = father; + } + else /* single particle or pseudo particle */ + { + if(*last >= 0) + { + if(*last >= Tree_MaxPart) + { + if(*last >= Tree_MaxPart + Tree_MaxNodes) + Nextnode[*last - Tree_MaxNodes] = no; /* a pseudo-particle or an imported point */ + else + Nodes[*last].u.d.nextnode = no; + } + else + Nextnode[*last] = no; + } + + *last = no; + + if(no < Tree_MaxPart) /* only set it for single particles... */ + Father[no] = father; + if(no >= Tree_MaxPart + Tree_MaxNodes + NTopleaves) /* ...or for imported points */ + Father[no - Tree_MaxNodes - NTopleaves] = father; + } +} + +/*! \brief Communicates the values of the multipole moments of the + * top-level tree-nodes of the domain grid. + * + * This data can then be used to update the pseudo-particles on each CPU + * accordingly. + * + * \return void + */ +void force_exchange_topleafdata(void) +{ + struct DomainNODE + { + MyDouble s[3]; + MyDouble mass; +#ifdef MULTIPLE_NODE_SOFTENING + MyDouble mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + unsigned char maxsofttype; +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) + int NodeGrNr; +#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */ + }; + + struct DomainNODE *DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE)); + + /* share the pseudo-particle data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(int task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(int n = 0; n < NTopleaves; n++) + recvcounts[DomainNewTask[n]]++; + + for(int task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + recvoffset[0] = 0, byteoffset[0] = 0; + for(int task = 1; task < NTask; task++) + { + recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1]; + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + } + + struct DomainNODE *loc_DomainMoment = + (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + int idx = 0; + for(int n = 0; n < NTopleaves; n++) + { + if(DomainNewTask[n] == ThisTask) + { + int no = DomainNodeIndex[n]; + + /* read out the multipole moments from the local base cells */ + loc_DomainMoment[idx].s[0] = Nodes[no].u.d.s[0]; + loc_DomainMoment[idx].s[1] = Nodes[no].u.d.s[1]; + loc_DomainMoment[idx].s[2] = Nodes[no].u.d.s[2]; + loc_DomainMoment[idx].mass = Nodes[no].u.d.mass; + loc_DomainMoment[idx].maxsofttype = Nodes[no].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + for(int k = 0; k < NSOFTTYPES; k++) + loc_DomainMoment[idx].mass_per_type[k] = ExtNodes[no].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + loc_DomainMoment[idx].maxhydrosofttype = Nodes[no].u.d.maxhydrosofttype; + loc_DomainMoment[idx].minhydrosofttype = Nodes[no].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + idx++; + } + } + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(int task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(int n = 0; n < NTopleaves; n++) + { + int task = DomainNewTask[n]; + if(task != ThisTask) + { + int no = DomainNodeIndex[n]; + int idx = recvoffset[task] + recvcounts[task]++; + + Nodes[no].u.d.s[0] = DomainMoment[idx].s[0]; + Nodes[no].u.d.s[1] = DomainMoment[idx].s[1]; + Nodes[no].u.d.s[2] = DomainMoment[idx].s[2]; + Nodes[no].u.d.mass = DomainMoment[idx].mass; + Nodes[no].u.d.maxsofttype = DomainMoment[idx].maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + for(int k = 0; k < NSOFTTYPES; k++) + ExtNodes[no].mass_per_type[k] = DomainMoment[idx].mass_per_type[k]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + Nodes[no].u.d.maxhydrosofttype = DomainMoment[idx].maxhydrosofttype; + Nodes[no].u.d.minhydrosofttype = DomainMoment[idx].minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvoffset); + myfree(recvcounts); + myfree(DomainMoment); +} + +/*! \brief Updates the top-level tree after the multipole moments of the + * pseudo-particles have been updated. + * + * \param[in] no Node to be updated. + * \param[in] topnode Index of the node no in the 'TopNodes' array. + * \param[in] bits 2^bits is the number of nodes per dimension at the level of + * the daughter nodes of node no. + * \param[in] x Position of the node no in the x direction, falls in the + * range [0,2^(bits-1) - 1]. + * \param[in] y Position of the node no in the y direction, falls in the + * range [0,2^(bits-1) - 1]. + * \param[in] z Position of the node no in the z direction, falls in the + * range [0,2^(bits-1) - 1]. + * + * \return void + */ +void force_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z) +{ + double s[3], mass; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(TopNodes[topnode].Daughter >= 0) + { + for(int i = 0; i < 2; i++) + for(int j = 0; j < 2; j++) + for(int k = 0; k < 2; k++) + { + int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + Tree_NextFreeNode++; + force_treeupdate_toplevel(Tree_NextFreeNode - 1, TopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, 2 * y + j, + 2 * z + k); + } + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; +#ifdef MULTIPLE_NODE_SOFTENING + for(int j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + int p = Nodes[no].u.d.nextnode; + + for(int j = 0; j < 8; j++) /* since we are dealing with top-level nodes, we know that there are 8 consecutive daughter nodes */ + { + if(p >= Tree_MaxPart && p < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + mass += Nodes[p].u.d.mass; + s[0] += Nodes[p].u.d.mass * Nodes[p].u.d.s[0]; + s[1] += Nodes[p].u.d.mass * Nodes[p].u.d.s[1]; + s[2] += Nodes[p].u.d.mass * Nodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[Nodes[p].u.d.maxsofttype]) + maxsofttype = Nodes[p].u.d.maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + for(int k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += ExtNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < Nodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = Nodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > Nodes[p].u.d.minhydrosofttype) + minhydrosofttype = Nodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else + terminate("may not happen"); + + p = Nodes[p].u.d.sibling; + } + + if(mass) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = Nodes[no].center[0]; + s[1] = Nodes[no].center[1]; + s[2] = Nodes[no].center[2]; + } + + Nodes[no].u.d.s[0] = s[0]; + Nodes[no].u.d.s[1] = s[1]; + Nodes[no].u.d.s[2] = s[2]; + Nodes[no].u.d.mass = mass; + Nodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + for(int k = 0; k < NSOFTTYPES; k++) + ExtNodes[no].mass_per_type[k] = mass_per_type[k]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + Nodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + Nodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } +} + +/*! \brief Allocates the memory used for storage of the tree nodes. + * + * Usually, the number of required nodes is of order 0.7*maxpart, but if this + * is insufficient, the code will try to allocated more space. + * + * \param[in] maxpart Number of particles on the current task. + * \param[in] maxindex The Nodes pointer will be shifted such that the index + * of the first element is maxindex. + * + * \return void + */ +void force_treeallocate(int maxpart, int maxindex) +{ + if(Nodes) + terminate("already allocated"); + + Tree_MaxPart = maxindex; + Tree_MaxNodes = (int)(All.TreeAllocFactor * maxpart) + NTopnodes; + + DomainNewTask = (int *)mymalloc_movable(&DomainNewTask, "DomainNewTask", NTopleaves * sizeof(int)); + DomainNodeIndex = (int *)mymalloc_movable(&DomainNodeIndex, "DomainNodeIndex", NTopleaves * sizeof(int)); + Tree_Task_list = (int *)mymalloc_movable(&Tree_Task_list, "Tree_Task_list", maxpart * sizeof(int)); + Tree_Pos_list = (MyDouble *)mymalloc_movable(&Tree_Pos_list, "Tree_Pos_list", 3 * maxpart * sizeof(MyDouble)); + + Nodes = (struct NODE *)mymalloc_movable(&Nodes, "Nodes", (Tree_MaxNodes + 1) * sizeof(struct NODE)); + Nodes -= Tree_MaxPart; +#ifdef MULTIPLE_NODE_SOFTENING + ExtNodes = (struct ExtNODE *)mymalloc_movable(&ExtNodes, "ExtNodes", (Tree_MaxNodes + 1) * sizeof(struct ExtNODE)); + ExtNodes -= Tree_MaxPart; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ +} + +/*! \brief Frees the memory allocated for the tree. + * + * I.e. it frees the space allocated by the function force_treeallocate(). + * + * \return void + */ +void force_treefree(void) +{ + if(Nodes) + { +#ifdef MULTIPLE_NODE_SOFTENING + myfree(ExtNodes + Tree_MaxPart); + ExtNodes = NULL; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + myfree(Nodes + Tree_MaxPart); + myfree(Tree_Pos_list); + myfree(Tree_Task_list); + myfree(DomainNodeIndex); + myfree(DomainNewTask); + + Nodes = NULL; + DomainNodeIndex = NULL; + DomainNewTask = NULL; + Tree_Task_list = NULL; + Nextnode = NULL; + Father = NULL; + } + else + terminate("trying to free the tree even though it's not allocated"); +} + +/*! \brief Dump particle data into file. + * + * This function dumps some of the basic particle data to a file. In case + * the tree construction fails, it is called just before the run + * terminates with an error message. Examination of the generated file may + * then give clues to what caused the problem. + * + * \return void + */ +void dump_particles(void) +{ + char buffer[200]; + sprintf(buffer, "particles%d.dat", ThisTask); + FILE *fd = fopen(buffer, "w"); + my_fwrite(&NumPart, 1, sizeof(int), fd); + for(int i = 0; i < NumPart; i++) + my_fwrite(&P[i].Pos[0], 3, sizeof(MyDouble), fd); + for(int i = 0; i < NumPart; i++) + my_fwrite(&P[i].Vel[0], 3, sizeof(MyFloat), fd); + for(int i = 0; i < NumPart; i++) + my_fwrite(&P[i].ID, 1, sizeof(int), fd); + fclose(fd); +} + +#ifdef ADDBACKGROUNDGRID +/*! \brief Add additional empty nodes. + * + * Called during tree construction if ADDBACKGROUNDGRID is active. + * + * \return 0: default; 1: number of nodes > max number of nodes. + */ +int force_add_empty_nodes(void) +{ + int nempty = 0; + int no, j, subnode; + + for(no = Tree_MaxPart; no < Tree_MaxPart + Tree_NumNodes; no++) + { + int count = 0; + + for(subnode = 0; subnode < 8; subnode++) + if(Nodes[no].u.suns[subnode] == -1) + count++; + + if(count < 8) + { + for(subnode = 0, count = 0; subnode < 8; subnode++) + if(Nodes[no].u.suns[subnode] == -1) + { + Nodes[no].u.suns[subnode] = Tree_NextFreeNode; + struct NODE *nfreep = &Nodes[Tree_NextFreeNode]; + + nfreep->len = 0.5 * Nodes[no].len; + double lenhalf = 0.25 * Nodes[no].len; + + if(subnode & 1) + nfreep->center[0] = Nodes[no].center[0] + lenhalf; + else + nfreep->center[0] = Nodes[no].center[0] - lenhalf; + + if(subnode & 2) + nfreep->center[1] = Nodes[no].center[1] + lenhalf; + else + nfreep->center[1] = Nodes[no].center[1] - lenhalf; + + if(subnode & 4) + nfreep->center[2] = Nodes[no].center[2] + lenhalf; + else + nfreep->center[2] = Nodes[no].center[2] - lenhalf; + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + Tree_NumNodes++; + Tree_NextFreeNode++; + + if(Tree_NumNodes >= Tree_MaxNodes) + { + if(All.TreeAllocFactor > 5.0) + { + char buf[500]; + sprintf( + buf, + "task %d: looks like a serious problem, stopping with particle dump. Tree_NumNodes=%d Tree_MaxNodes=%d\n", + ThisTask, Tree_NumNodes, Tree_MaxNodes); + dump_particles(); + terminate(buf); + } + return 1; + } + nempty++; + } + } + } + + printf("FORCETREE: Task %d has added %d empty nodes\n", ThisTask, nempty); + return 0; +} +#endif /* #ifdef ADDBACKGROUNDGRID */ diff --git a/src/amuse/community/arepo/src/gravity/forcetree.h b/src/amuse/community/arepo/src/gravity/forcetree.h new file mode 100644 index 0000000000..0371e7e9f7 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree.h @@ -0,0 +1,168 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree.h + * \date 05/2018 + * \brief Functions and data structurer for forcetree. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef FORCETREE_H +#define FORCETREE_H + +#ifndef INLINE_FUNC +#define INLINE_FUNC +#endif /* #ifndef INLINE_FUNC */ + +typedef struct +{ + MyDouble Pos[3]; + float OldAcc; + unsigned char Type; + unsigned char SofteningType; + + int Firstnode; +} gravdata_in; + +typedef struct +{ + MyFloat Acc[3]; +#ifdef EVALPOTENTIAL + MyFloat Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + int GravInteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + +} gravdata_out; + +#ifdef LONG_X +#define STRETCHX (LONG_X) +#else /* #ifdef LONG_X */ +#define STRETCHX 1 +#endif /* #ifdef LONG_X #else */ + +#ifdef LONG_Y +#define STRETCHY (LONG_Y) +#else /* #ifdef LONG_Y */ +#define STRETCHY 1 +#endif /* #ifdef LONG_Y #else */ + +#ifdef LONG_Z +#define STRETCHZ (LONG_Z) +#else /* #ifdef LONG_Z */ +#define STRETCHZ 1 +#endif /* #ifdef LONG_Z #else */ + +#define DBX 1 +#define DBY 1 +#define DBZ 1 +#define DBX_EXTRA 0 +#define DBY_EXTRA 0 +#define DBZ_EXTRA 0 + +/*! length of lock-up table for short-range force kernel in TreePM algorithm */ +#define NTAB 127 + +#if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) + +#define EN 64 + +#define ENX (DBX * STRETCHX * EN) +#define ENY (DBY * STRETCHY * EN) +#define ENZ (DBZ * STRETCHZ * EN) + +extern MyFloat Ewd_fcorrx[ENX + 1][ENY + 1][ENZ + 1]; +extern MyFloat Ewd_fcorry[ENX + 1][ENY + 1][ENZ + 1]; +extern MyFloat Ewd_fcorrz[ENX + 1][ENY + 1][ENZ + 1]; +extern MyFloat Ewd_potcorr[ENX + 1][ENY + 1][ENZ + 1]; +extern double Ewd_fac_intp; + +extern int NTreeInsert; + +#endif /* #if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) */ + +#define MAX_TREE_LEVEL 30 +#define MAX_TREE_ALLOC_FACTOR 30.0 + +#define TAKE_NSLOTS_IN_ONE_GO 32 + +#define MAX_IMPACT_BEFORE_OPTIMIZATION 1.03 + +#define BITFLAG_TOPLEVEL 0 +#define BITFLAG_DEPENDS_ON_LOCAL_MASS 1 +#define BITFLAG_DEPENDS_ON_EXTERN_MASS 2 +#define BITFLAG_INTERNAL_TOPLEVEL 6 +#define BITFLAG_MULTIPLEPARTICLES 7 +#define BITFLAG_CONTAINS_GAS 10 + +#define BITFLAG_MASK ((1 << BITFLAG_CONTAINS_GAS) + (1 << BITFLAG_MULTIPLEPARTICLES)) + +static inline unsigned long long force_double_to_int(double d) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = d; + return (u.ull & 0xFFFFFFFFFFFFFllu); +} + +static inline double force_int_to_double(unsigned long long x) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = 1.0; + u.ull |= x; + return u.d; +} + +int tree_treefind_export_node_threads(int no, int target, int thread_id); +int construct_forcetree(int mode, int optimized_domain_mapping, int insert_only_primary, int timebin); +int force_treebuild(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin); +int force_treebuild_construct(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin); +int force_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char level); +int force_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z); +void force_insert_pseudo_particles(void); +void force_update_node_recursive(int no, int sib, int father, int *last); +void force_exchange_topleafdata(void); +void force_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z); +void force_treeallocate(int maxpart, int maxindex); +void force_treefree(void); +void dump_particles(void); +int force_add_empty_nodes(void); +void force_short_range_init(void); +int force_treeevaluate(gravdata_in *in, gravdata_out *out, int target, int mode, int thread_id, int numnodes, int *firstnode, + int measure_cost_flag); +void force_assign_cost_values(void); +void force_optimize_domain_mapping(void); +double force_get_current_balance(double *impact); +void force_get_global_cost_for_leavenodes(int nexport); +void forcetest_ewald_init(void); + +#endif /* #ifndef FORCETREE_H */ diff --git a/src/amuse/community/arepo/src/gravity/forcetree_ewald.c b/src/amuse/community/arepo/src/gravity/forcetree_ewald.c new file mode 100644 index 0000000000..f1b73fb5f6 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree_ewald.c @@ -0,0 +1,529 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree_ewald.c + * \date 05/2018 + * \brief Code for Ewald correction (i.e. tree force with periodic + * boundary conditions. + * \details This file contains the computation of the Ewald correction + * table. + * contains functins: + * void ewald_init(void) + * void ewald_corr(double dx, double dy, double dz, double + * *fper) + * double ewald_pot_corr(double dx, double dy, double dz) + * double ewald_psi(double x, double y, double z) + * void ewald_force(double x, double y, double z, double + * force[3]) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) +#include + +/* variables for Ewald correction lookup table */ +MyFloat Ewd_fcorrx[ENX + 1][ENY + 1][ENZ + 1]; +MyFloat Ewd_fcorry[ENX + 1][ENY + 1][ENZ + 1]; +MyFloat Ewd_fcorrz[ENX + 1][ENY + 1][ENZ + 1]; +MyFloat Ewd_potcorr[ENX + 1][ENY + 1][ENZ + 1]; +double Ewd_fac_intp; + +/*! \brief Structure that holds information of Ewald correction table. + */ +typedef struct +{ + int resx, resy, resz, varsize, ewaldtype; +} ewald_header; + +/*! \brief This function initializes tables with the correction force and the + * correction potential due to the periodic images of a point mass located + * at the origin. + * + * These corrections are obtained by Ewald summation. (See for example + * Hernquist, Bouchet, Suto, ApJS, 1991, 75, 231) The correction fields + * are used to obtain the full periodic force if periodic boundaries + * combined with the pure tree algorithm are used. For the TreePM + * algorithm, the Ewald correction is not used. + * + * The correction terms are computed by ewald_psi() and ewald_force() and + * stored in the arrays Ewd_fcorrx, Ewd_fcorry, Ewd_fcorrz and Ewd_potcorr. + * + * The correction fields are stored on disk once they are computed. If a + * corresponding file is found, they are loaded from disk to speed up the + * initialization. The Ewald summation issrc/gravtree_forcetest.c done in + * parallel, i.e. the processors share the work to compute the tables if + * needed. + * + * \return void + */ +void ewald_init(void) +{ + int recomputeflag = 0; + double force[3]; + char buf[200]; + FILE *fd; + + mpi_printf("EWALD: initialize Ewald correction...\n"); + +#ifdef LONG_X + if(LONG_X != (int)(LONG_X)) + terminate("LONG_X must be an integer"); +#endif /* #ifdef LONG_X */ + +#ifdef LONG_Y + if(LONG_Y != (int)(LONG_Y)) + terminate("LONG_Y must be an integer"); +#endif /* #ifdef LONG_Y */ + +#ifdef LONG_Z + if(LONG_Z != (int)(LONG_Z)) + terminate("LONG_Z must be an integer"); +#endif /* #ifdef LONG_Z */ + + sprintf(buf, "ewald_table_%d_%d_%d.dat", ENX, ENY, ENZ); + + if(ThisTask == 0) + { + if((fd = fopen(buf, "r"))) + { + mpi_printf("\nEWALD: reading Ewald tables from file `%s'\n", buf); + + ewald_header tabh; + my_fread(&tabh, sizeof(ewald_header), 1, fd); + + int ewaldtype = -1; + + if(tabh.resx != ENX || tabh.resy != ENY || tabh.resz != ENZ || tabh.varsize != sizeof(MyFloat) || + tabh.ewaldtype != ewaldtype) + { + mpi_printf("\nEWALD: something's wrong with this table file. Discarding it.\n"); + recomputeflag = 1; + } + else + { + my_fread(Ewd_fcorrx, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fread(Ewd_fcorry, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fread(Ewd_fcorrz, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fread(Ewd_potcorr, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + + recomputeflag = 0; + } + fclose(fd); + } + else + recomputeflag = 1; + } + + MPI_Bcast(&recomputeflag, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(recomputeflag) + { + mpi_printf("\nEWALD: No usable Ewald tables in file `%s' found. Recomputing them...\n", buf); + + /* ok, let's recompute things. Actually, we do that in parallel. */ + int size = (ENX + 1) * (ENY + 1) * (ENZ + 1); + int first, count; + + subdivide_evenly(size, NTask, ThisTask, &first, &count); + + for(int n = first; n < first + count; n++) + { + int i = n / ((ENY + 1) * (ENZ + 1)); + int j = (n - i * (ENY + 1) * (ENZ + 1)) / (ENZ + 1); + int k = (n - i * (ENY + 1) * (ENZ + 1) - j * (ENZ + 1)); + + if(ThisTask == 0) + { + if(((n - first) % (count / 20)) == 0) + { + printf("%4.1f percent done\n", (n - first) / (count / 100.0)); + myflush(stdout); + } + } + + double xx = 0.5 * DBX * STRETCHX * ((double)i) / ENX; + double yy = 0.5 * DBY * STRETCHY * ((double)j) / ENY; + double zz = 0.5 * DBZ * STRETCHZ * ((double)k) / ENZ; + + Ewd_potcorr[i][j][k] = ewald_psi(xx, yy, zz); + + ewald_force(xx, yy, zz, force); + + Ewd_fcorrx[i][j][k] = force[0]; + Ewd_fcorry[i][j][k] = force[1]; + Ewd_fcorrz[i][j][k] = force[2]; + } + + int *recvcnts = (int *)mymalloc("recvcnts", NTask * sizeof(int)); + int *recvoffs = (int *)mymalloc("recvoffs", NTask * sizeof(int)); + + for(int i = 0; i < NTask; i++) + { + int off, cnt; + subdivide_evenly(size, NTask, i, &off, &cnt); + recvcnts[i] = cnt * sizeof(MyFloat); + recvoffs[i] = off * sizeof(MyFloat); + } + + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_fcorrx, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_fcorry, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_fcorrz, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_potcorr, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + + myfree(recvoffs); + myfree(recvcnts); + + mpi_printf("\nEWALD: writing Ewald tables to file `%s'\n", buf); + if(ThisTask == 0) + { + if((fd = fopen(buf, "w"))) + { + ewald_header tabh; + tabh.resx = ENX; + tabh.resy = ENY; + tabh.resz = ENZ; + tabh.varsize = sizeof(MyFloat); + tabh.ewaldtype = -1; + + my_fwrite(&tabh, sizeof(ewald_header), 1, fd); + my_fwrite(Ewd_fcorrx, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fwrite(Ewd_fcorry, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fwrite(Ewd_fcorrz, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fwrite(Ewd_potcorr, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + fclose(fd); + } + } + } + else + { + /* here we got them from disk */ + int len = (ENX + 1) * (ENY + 1) * (ENZ + 1) * sizeof(MyFloat); + + MPI_Bcast(Ewd_fcorrx, len, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(Ewd_fcorry, len, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(Ewd_fcorrz, len, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(Ewd_potcorr, len, MPI_BYTE, 0, MPI_COMM_WORLD); + } + + /* now scale things to the boxsize that is actually used */ + Ewd_fac_intp = 2 * EN / All.BoxSize; + + for(int i = 0; i <= ENX; i++) + for(int j = 0; j <= ENY; j++) + for(int k = 0; k <= ENZ; k++) + { + Ewd_potcorr[i][j][k] /= All.BoxSize; + Ewd_fcorrx[i][j][k] /= All.BoxSize * All.BoxSize; + Ewd_fcorry[i][j][k] /= All.BoxSize * All.BoxSize; + Ewd_fcorrz[i][j][k] /= All.BoxSize * All.BoxSize; + } + + mpi_printf("EWALD: Initialization of periodic boundaries finished.\n"); +} + +/*! \brief This function looks up the correction force due to the infinite + * number of periodic particle/node images. + * + * We here use trilinear interpolation to get it from the precomputed tables, + * which contain one octant around the target particle at the origin. The + * other octants are obtained from it by exploiting the symmetry properties. + * + * \param[in] dx x component of the distance between the two particles. + * \param[in] dx y component of the distance between the two particles. + * \param[in] dx z component of the distance between the two particles. + * \param[out] fper pointer to array containing the correction force. + * + * \return void + */ +void ewald_corr(double dx, double dy, double dz, double *fper) +{ + int signx, signy, signz; + int i, j, k; + double u, v, w; + double f1, f2, f3, f4, f5, f6, f7, f8; + + if(dx < 0) + { + dx = -dx; + signx = +1; + } + else + signx = -1; + if(dy < 0) + { + dy = -dy; + signy = +1; + } + else + signy = -1; + if(dz < 0) + { + dz = -dz; + signz = +1; + } + else + signz = -1; + u = dx * Ewd_fac_intp; + i = (int)u; + if(i >= ENX) + i = ENX - 1; + u -= i; + v = dy * Ewd_fac_intp; + j = (int)v; + if(j >= ENY) + j = ENY - 1; + v -= j; + w = dz * Ewd_fac_intp; + k = (int)w; + if(k >= ENZ) + k = ENZ - 1; + w -= k; + f1 = (1 - u) * (1 - v) * (1 - w); + f2 = (1 - u) * (1 - v) * (w); + f3 = (1 - u) * (v) * (1 - w); + f4 = (1 - u) * (v) * (w); + f5 = (u) * (1 - v) * (1 - w); + f6 = (u) * (1 - v) * (w); + f7 = (u) * (v) * (1 - w); + f8 = (u) * (v) * (w); + fper[0] = signx * (Ewd_fcorrx[i][j][k] * f1 + Ewd_fcorrx[i][j][k + 1] * f2 + Ewd_fcorrx[i][j + 1][k] * f3 + + Ewd_fcorrx[i][j + 1][k + 1] * f4 + Ewd_fcorrx[i + 1][j][k] * f5 + Ewd_fcorrx[i + 1][j][k + 1] * f6 + + Ewd_fcorrx[i + 1][j + 1][k] * f7 + Ewd_fcorrx[i + 1][j + 1][k + 1] * f8); + fper[1] = signy * (Ewd_fcorry[i][j][k] * f1 + Ewd_fcorry[i][j][k + 1] * f2 + Ewd_fcorry[i][j + 1][k] * f3 + + Ewd_fcorry[i][j + 1][k + 1] * f4 + Ewd_fcorry[i + 1][j][k] * f5 + Ewd_fcorry[i + 1][j][k + 1] * f6 + + Ewd_fcorry[i + 1][j + 1][k] * f7 + Ewd_fcorry[i + 1][j + 1][k + 1] * f8); + fper[2] = signz * (Ewd_fcorrz[i][j][k] * f1 + Ewd_fcorrz[i][j][k + 1] * f2 + Ewd_fcorrz[i][j + 1][k] * f3 + + Ewd_fcorrz[i][j + 1][k + 1] * f4 + Ewd_fcorrz[i + 1][j][k] * f5 + Ewd_fcorrz[i + 1][j][k + 1] * f6 + + Ewd_fcorrz[i + 1][j + 1][k] * f7 + Ewd_fcorrz[i + 1][j + 1][k + 1] * f8); +} + +/*! \brief This function looks up the correction potential due to the infinite + * number of periodic particle/node images. + * + * We here use tri-linear interpolation to get it from the precomputed + * table, which contains one octant around the target particle at the + * origin. The other octants are obtained from it by exploiting symmetry + * properties. + * + * \param[in] dx x component of the distance between the two particles. + * \param[in] dx y component of the distance between the two particles. + * \param[in] dx z component of the distance between the two particles. + * + * \return The correction potential. + */ +double ewald_pot_corr(double dx, double dy, double dz) +{ + int i, j, k; + double u, v, w; + double f1, f2, f3, f4, f5, f6, f7, f8; + + if(dx < 0) + dx = -dx; + if(dy < 0) + dy = -dy; + if(dz < 0) + dz = -dz; + u = dx * Ewd_fac_intp; + i = (int)u; + if(i >= ENX) + i = ENX - 1; + u -= i; + v = dy * Ewd_fac_intp; + j = (int)v; + if(j >= ENY) + j = ENY - 1; + v -= j; + w = dz * Ewd_fac_intp; + k = (int)w; + if(k >= ENZ) + k = ENZ - 1; + w -= k; + f1 = (1 - u) * (1 - v) * (1 - w); + f2 = (1 - u) * (1 - v) * (w); + f3 = (1 - u) * (v) * (1 - w); + f4 = (1 - u) * (v) * (w); + f5 = (u) * (1 - v) * (1 - w); + f6 = (u) * (1 - v) * (w); + f7 = (u) * (v) * (1 - w); + f8 = (u) * (v) * (w); + return Ewd_potcorr[i][j][k] * f1 + Ewd_potcorr[i][j][k + 1] * f2 + Ewd_potcorr[i][j + 1][k] * f3 + + Ewd_potcorr[i][j + 1][k + 1] * f4 + Ewd_potcorr[i + 1][j][k] * f5 + Ewd_potcorr[i + 1][j][k + 1] * f6 + + Ewd_potcorr[i + 1][j + 1][k] * f7 + Ewd_potcorr[i + 1][j + 1][k + 1] * f8; +} + +/*! \brief This function computes the potential correction term by means of + * Ewald summation. + * + * \param[in] x X distance for which the correction term should be computed. + * \param[in] y Y distance for which the correction term should be computed. + * \param[in] z Z distance for which the correction term should be computed. + * + * \return The correction term. + */ +double ewald_psi(double x, double y, double z) +{ + static int printed = 0; + + double r = sqrt(x * x + y * y + z * z); + + if(r == 0) + return 0; + + double lmin = imin(imin(STRETCHX, STRETCHY), STRETCHZ); + double alpha = 3.0 / lmin; + + const int nmax = 4; + + double sum1 = 0; + for(int nx = -nmax; nx <= nmax; nx++) + for(int ny = -nmax; ny <= nmax; ny++) + for(int nz = -nmax; nz <= nmax; nz++) + { + double dx = x - nx * STRETCHX; + double dy = y - ny * STRETCHY; + double dz = z - nz * STRETCHZ; + double r = sqrt(dx * dx + dy * dy + dz * dz); + sum1 += erfc(alpha * r) / r; + } + + double alpha2 = alpha * alpha; + + int nxmax = (int)(2 * alpha * (STRETCHX / lmin) + 0.5); + int nymax = (int)(2 * alpha * (STRETCHY / lmin) + 0.5); + int nzmax = (int)(2 * alpha * (STRETCHZ / lmin) + 0.5); + + if(printed == 0) + { + mpi_printf("EWALD: potential tab: nxmax=%d nymax=%d nzmax=%d\n", nxmax, nymax, nzmax); + printed = 1; + } + + double sum2 = 0.0; + for(int nx = -nxmax; nx <= nxmax; nx++) + for(int ny = -nymax; ny <= nymax; ny++) + for(int nz = -nzmax; nz <= nzmax; nz++) + { + double kx = (2.0 * M_PI / (STRETCHX)) * nx; + double ky = (2.0 * M_PI / (STRETCHY)) * ny; + double kz = (2.0 * M_PI / (STRETCHZ)) * nz; + double k2 = kx * kx + ky * ky + kz * kz; + if(k2 > 0) + { + double kdotx = (x * kx + y * ky + z * kz); + sum2 += 4.0 * M_PI / (k2 * STRETCHX * STRETCHY * STRETCHZ) * exp(-k2 / (4.0 * alpha2)) * cos(kdotx); + } + } + + double psi = /*-2.83729 + */ M_PI / (alpha * alpha * STRETCHX * STRETCHY * STRETCHZ) - sum1 - sum2 + 1.0 / r; + + return psi; +} + +/*! \brief This function computes the force correction term (difference + * between full force of infinite lattice and nearest image) by Ewald + * summation. + * + * \param[in] x X distance for which the correction term should be computed. + * \param[in] y Y distance for which the correction term should be computed. + * \param[in] z Z distance for which the correction term should be computed. + * \param force Array will containing the correction force, + * + * \return void + */ +void ewald_force(double x, double y, double z, double force[3]) +{ + static int printed = 0; + for(int i = 0; i < 3; i++) + force[i] = 0; + double r2 = x * x + y * y + z * z; + + if(r2 == 0) + return; + + double lmin = imin(imin(STRETCHX, STRETCHY), STRETCHZ); + double alpha = 2.0 / lmin; + double alpha2 = alpha * alpha; + + double r3inv = 1.0 / (r2 * sqrt(r2)); + + force[0] += r3inv * x; + force[1] += r3inv * y; + force[2] += r3inv * z; + + const int nmax = 4; + + for(int nx = -nmax; nx <= nmax; nx++) + for(int ny = -nmax; ny <= nmax; ny++) + for(int nz = -nmax; nz <= nmax; nz++) + { + double dx = x - nx * STRETCHX; + double dy = y - ny * STRETCHY; + double dz = z - nz * STRETCHZ; + double r2 = dx * dx + dy * dy + dz * dz; + double r = sqrt(r2); + double val = erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2); + double val2 = val / (r2 * r); + + force[0] -= dx * val2; + force[1] -= dy * val2; + force[2] -= dz * val2; + } + + int nxmax = (int)(2 * alpha * (STRETCHX / lmin) + 0.5); + int nymax = (int)(2 * alpha * (STRETCHY / lmin) + 0.5); + int nzmax = (int)(2 * alpha * (STRETCHZ / lmin) + 0.5); + + if(printed == 0) + { + mpi_printf("EWALD: force tab: nxmax=%d nymax=%d nzmax=%d\n", nxmax, nymax, nzmax); + printed = 1; + } + + for(int hx = -nxmax; hx <= nxmax; hx++) + for(int hy = -nymax; hy <= nymax; hy++) + for(int hz = -nzmax; hz <= nzmax; hz++) + { + double h2 = hx * hx + hy * hy + hz * hz; + if(h2 > 0) + { + double hdotx = x * hx + y * hy + z * hz; + double val = 2.0 / h2 * exp(-M_PI * M_PI * h2 / alpha2) * sin(2.0 * M_PI * hdotx); + + force[0] -= hx * val; + force[1] -= hy * val; + force[2] -= hz * val; + } + } +} + +#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ diff --git a/src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c b/src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c new file mode 100644 index 0000000000..3289af844c --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c @@ -0,0 +1,486 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree_optimizebalance.c + * \date 05/2018 + * \brief Does some preparation work for use of red-black ordered binary + * tree based on BSD macros. + * \details contains functions: + * int force_sort_load(const void *a, const void *b) + * double force_get_current_balance(double *impact) + * void force_get_global_cost_for_leavenodes(int nexport) + * static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) + * void force_optimize_domain_mapping(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/bsd_tree.h" +#include "../domain/domain.h" + +/* \brief Structure of my tree nodes. + */ +struct mydata +{ + double pri; + int target; + RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */ +}; + +/* prototype of comparison function of tree elements */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs); + +/* the following macro declares 'struct mytree', which is the header element needed as handle for a tree */ +RB_HEAD(mytree, mydata); + +/* the following macros declare appropriate function prototypes and functions needed for this type of tree */ +RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp); +RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp); + +/*! \brief Data structure that describes force-segment. + */ +static struct force_segments_data +{ + int start, end, task; + double work, cost, count, normalized_load; +} * force_domainAssign; + +/*! \brief Comparison function for force_segments_data. + * + * Sorting kernel. + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1), -1 if a->normalized_load > b->normalized_load. + */ +int force_sort_load(const void *a, const void *b) +{ + if(((struct force_segments_data *)a)->normalized_load > (((struct force_segments_data *)b)->normalized_load)) + return -1; + + if(((struct force_segments_data *)a)->normalized_load < (((struct force_segments_data *)b)->normalized_load)) + return +1; + + return 0; +} + +static double oldmax, oldsum; + +/*! \brief Calculates current balance. + * + * \param[out] impact Impact factor of imbalance (1 if optimally balanced). + * + * \return Domain balance = max(cost) / average(cost). + */ +double force_get_current_balance(double *impact) +{ +#ifndef NO_MPI_IN_PLACE + MPI_Allreduce(MPI_IN_PLACE, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#else /* #ifndef NO_MPI_IN_PLACE */ + double *inTaskCost = mymalloc("inTaskCost", NTask * sizeof(double)); + ; + memcpy(inTaskCost, TaskCost, NTask * sizeof(double)); + MPI_Allreduce(inTaskCost, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + myfree(inTaskCost); +#endif /* #ifndef NO_MPI_IN_PLACE #else */ + + int i; + for(i = 0, oldmax = oldsum = 0; i < NTask; i++) + { + oldsum += TaskCost[i]; + if(oldmax < TaskCost[i]) + oldmax = TaskCost[i]; + } + + *impact = 1.0 + domain_grav_weight[All.HighestActiveTimeBin] * (oldmax - oldsum / NTask) / All.TotGravCost; + + return oldmax / (oldsum / NTask); +} + +/*! \brief Gather cost data of all leaf-nodes and communicate result. + * + * \param[in] nexport Number of exported nodes. + * + * \return void + */ +void force_get_global_cost_for_leavenodes(int nexport) +{ + int i, j, n, nimport, idx, task, ngrp; + + struct node_data + { + double domainCost; + int domainCount; + int no; + } * export_node_data, *import_node_data; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + export_node_data = mymalloc("export_node_data", nexport * sizeof(struct node_data)); + import_node_data = mymalloc("import_node_data", nimport * sizeof(struct node_data)); + + for(i = 0; i < nexport; i++) + { + int task = ListNoData[i].task; + int ind = Send_offset[task] + Send_count[task]++; + + export_node_data[ind].domainCost = ListNoData[i].domainCost; + export_node_data[ind].domainCount = ListNoData[i].domainCount; + export_node_data[ind].no = ListNoData[i].no; + } + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&export_node_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct node_data), MPI_BYTE, recvTask, + TAG_DENS_B, &import_node_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct node_data), MPI_BYTE, + recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + for(i = 0; i < nimport; i++) + { + int no = import_node_data[i].no; + DomainCost[no] += import_node_data[i].domainCost; + DomainCount[no] += import_node_data[i].domainCount; + } + + myfree(import_node_data); + myfree(export_node_data); + + /* now share the cost data across all processors */ + struct DomainNODE + { + double domainCost; + int domainCount; + } * DomainMoment, *loc_DomainMoment; + + DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE)); + + /* share the cost data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(n = 0; n < NTopleaves; n++) + recvcounts[DomainTask[n]]++; + + for(task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + for(task = 1, recvoffset[0] = 0, byteoffset[0] = 0; task < NTask; task++) + { + recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1]; + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + } + + loc_DomainMoment = (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + for(n = 0, idx = 0; n < NTopleaves; n++) + { + if(DomainTask[n] == ThisTask) + { + loc_DomainMoment[idx].domainCost = DomainCost[n]; + loc_DomainMoment[idx].domainCount = DomainCount[n]; + idx++; + } + } + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(n = 0; n < NTopleaves; n++) + { + task = DomainTask[n]; + if(task != ThisTask) + { + idx = recvoffset[task] + recvcounts[task]++; + + DomainCost[n] = DomainMoment[idx].domainCost; + DomainCount[n] = DomainMoment[idx].domainCount; + } + } + + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvoffset); + myfree(recvcounts); + myfree(DomainMoment); +} + +/*! \brief Comparison function of tree elements. + * + * Compares + * - pri and if this is equal + * - target + * + * \param[in] lhs First mydata object. + * \param[in] rhs Second mydata object. + * + * \return (-1,0,1) -1 if lhs < rhs. + */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) +{ + if(lhs->pri < rhs->pri) + return -1; + else if(lhs->pri > rhs->pri) + return 1; + else if(lhs->target < rhs->target) + return -1; + else if(lhs->target > rhs->target) + return 1; + + return 0; +} + +/*! \brief Optimization algorithm for the workload balance. + * + * \return void + */ +void force_optimize_domain_mapping(void) +{ + int i, j; + + double fac_cost = 0.5 / oldsum; + double fac_count = 0.5 / All.TotNumPart; + + int ncpu = NTask * All.MultipleDomains; + int ndomain = NTopleaves; + double workavg = 1.0 / ncpu; + double workhalfnode = 0.5 / NTopleaves; + double work_before = 0; + double workavg_before = 0; + + int start = 0; + + force_domainAssign = mymalloc("force_domainAssign", ncpu * sizeof(struct force_segments_data)); + + for(i = 0; i < ncpu; i++) + { + double work = 0, cost = 0, count = 0; + int end = start; + + cost += fac_cost * DomainCost[end]; + count += fac_count * DomainCount[end]; + work += fac_cost * DomainCost[end] + fac_count * DomainCount[end]; + + while((work + work_before + (end + 1 < NTopleaves ? fac_cost * DomainCost[end + 1] + fac_count * DomainCount[end + 1] : 0) < + workavg + workavg_before + workhalfnode) || + (i == ncpu - 1 && end < ndomain - 1)) + { + if((ndomain - end) > (ncpu - i)) + end++; + else + break; + + cost += fac_cost * DomainCost[end]; + count += fac_count * DomainCount[end]; + work += fac_cost * DomainCost[end] + fac_count * DomainCount[end]; + } + + force_domainAssign[i].start = start; + force_domainAssign[i].end = end; + force_domainAssign[i].work = work; + force_domainAssign[i].cost = cost; + force_domainAssign[i].count = count; + + force_domainAssign[i].normalized_load = cost + count; /* note: they are already multiplied by fac_cost/fac_count */ + + work_before += work; + workavg_before += workavg; + start = end + 1; + } + + qsort(force_domainAssign, ncpu, sizeof(struct force_segments_data), force_sort_load); + + /* create three priority trees, one for the cost load, one for the particle count, and one for the combined cost */ + struct mytree queues[3]; /* 0=cost, 1=count, 2=combi */ + + struct mydata *ncost = mymalloc("ncost", NTask * sizeof(struct mydata)); + struct mydata *ncount = mymalloc("ncount", NTask * sizeof(struct mydata)); + struct mydata *ncombi = mymalloc("ncombi", NTask * sizeof(struct mydata)); + + RB_INIT(&queues[0]); + RB_INIT(&queues[1]); + RB_INIT(&queues[2]); + + /* fill in all the tasks into the trees. The priority will be the current cost/count, the tag 'val' is used to label the task */ + for(i = 0; i < NTask; i++) + { + ncost[i].pri = 0; + ncost[i].target = i; + RB_INSERT(mytree, &queues[0], &ncost[i]); + + ncount[i].pri = 0; + ncount[i].target = i; + RB_INSERT(mytree, &queues[1], &ncount[i]); + + ncombi[i].pri = 0; + ncombi[i].target = i; + RB_INSERT(mytree, &queues[2], &ncombi[i]); + } + + double max_load = 0; + double max_cost = 0; + + int n_lowest = MAX_FIRST_ELEMENTS_CONSIDERED; + if(n_lowest > NTask) + n_lowest = NTask; + + int rep, *candidates = mymalloc("candidates", n_lowest * sizeof(int)); + struct mydata *np; + + for(i = 0; i < ncpu; i++) + { + /* pick the least work-loaded target from the queue, and the least particle-loaded, and then decide which choice + gives the smallest load overall */ + double cost, load; + double bestwork = 1.0e30; + int q, target = -1; + + for(q = 0; q < 3; q++) + { + /* look up the n_lowest smallest elements from the tree */ + for(np = RB_MIN(mytree, &queues[q]), rep = 0; np != NULL && rep < n_lowest; np = RB_NEXT(mytree, &queues[q], np), rep++) + candidates[rep] = np->target; + + for(rep = 0; rep < n_lowest; rep++) + { + int t = candidates[rep]; + + cost = ncost[t].pri + force_domainAssign[i].cost; + load = ncount[t].pri + force_domainAssign[i].count; + if(cost < max_cost) + cost = max_cost; + if(load < max_load) + load = max_load; + double w = cost + load; + if(w < bestwork) + { + bestwork = w; + target = t; + } + } + } + + force_domainAssign[i].task = target; + + cost = ncost[target].pri + force_domainAssign[i].cost; + load = ncount[target].pri + force_domainAssign[i].count; + + RB_REMOVE(mytree, &queues[0], &ncost[target]); + ncost[target].pri = cost; + RB_INSERT(mytree, &queues[0], &ncost[target]); + + RB_REMOVE(mytree, &queues[1], &ncount[target]); + ncount[target].pri = load; + RB_INSERT(mytree, &queues[1], &ncount[target]); + + RB_REMOVE(mytree, &queues[2], &ncombi[target]); + ncombi[target].pri = cost + load; + RB_INSERT(mytree, &queues[2], &ncombi[target]); + + if(max_cost < cost) + max_cost = cost; + + if(max_load < load) + max_load = load; + } + + myfree(candidates); + + /* free tree nodes again */ + myfree(ncombi); + myfree(ncount); + myfree(ncost); + + for(i = 0; i < ncpu; i++) + for(j = force_domainAssign[i].start; j <= force_domainAssign[i].end; j++) + DomainNewTask[j] = force_domainAssign[i].task; + + myfree(force_domainAssign); + + for(i = 0; i < NTask; i++) + { + TaskCost[i] = 0; + TaskCount[i] = 0; + } + + for(i = 0; i < NTopleaves; i++) + { + TaskCost[DomainNewTask[i]] += DomainCost[i]; + TaskCount[DomainNewTask[i]] += DomainCount[i]; + } + + double max, sum, maxload, sumload; + for(i = 0, max = sum = 0, maxload = sumload = 0; i < NTask; i++) + { + sum += TaskCost[i]; + if(max < TaskCost[i]) + max = TaskCost[i]; + sumload += TaskCount[i]; + if(maxload < TaskCount[i]) + maxload = TaskCount[i]; + } + + mpi_printf("FORCETREE: Active-TimeBin=%d [unoptimized work-balance=%g] new work-balance=%g, new load-balance=%g\n", + All.HighestActiveTimeBin, oldmax / (oldsum / NTask), max / (sum / NTask), maxload / (sumload / NTask)); + + if((max / (sum / NTask) > oldmax / (oldsum / NTask)) || (maxload > All.MaxPart)) + { + mpi_printf( + "FORCETREE: The work-load is either worse than before or the memory-balance is not viable. We keep the old distribution.\n"); + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + } +} diff --git a/src/amuse/community/arepo/src/gravity/forcetree_walk.c b/src/amuse/community/arepo/src/gravity/forcetree_walk.c new file mode 100644 index 0000000000..b773024cea --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree_walk.c @@ -0,0 +1,709 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree_walk.c + * \date 05/2018 + * \brief Gravitational tree walk code. + * \details This file contains the various gravitational tree walks. + * contains functions: + * void force_short_range_init(void) + * int force_treeevaluate(gravdata_in * in, gravdata_out * out, + * int target, int mode, int thread_id, int numnodes, int + * *firstnode, int measure_cost_flag) + * int tree_treefind_export_node_threads(int no, int i, int + * thread_id) + * void force_evaluate_direct(int target, int result_idx, + * int nimport) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 16.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Variable for short-range lookup table. + * + * Contains the factor needed for the short range + * contribution of the tree to the gravity force. + */ +static float shortrange_table[NTAB + 1]; + +/*! \brief Variable for short-range lookup table. + * + * Contains the factor needed for the short range + * contribution of the tree to the potential energy. + */ +static float shortrange_table_potential[NTAB + 1]; + +/*! \brief Initializes the short range table. + * + * The short range table contains the complementary error function + * needed for the computation of the short range part of the gravity + * force/potential in case of the TreePM algorithm. + * + * \return void + */ +void force_short_range_init(void) +{ + for(int i = 0; i <= NTAB; i++) + { + double u = ((RCUT / 2.0) / NTAB) * i; + + shortrange_table_potential[i] = -erfc(u); /* -r * g(r) */ + + if(u > 0) + shortrange_table[i] = (erfc(u) + 2.0 * u / sqrt(M_PI) * exp(-u * u) - 1.0) / (u * u); /* -g'(r) - 1/r^2 */ + else + shortrange_table[i] = 0; + } +} + +/*! \brief This routine calculates the (short range) force contribution + * for a given particle in case the Tree(PM) algorithm is used. + * + * In the TreePM algorithm, the tree is walked only locally around the + * target coordinate. Tree nodes that fall outside a box of half + * side-length Rcut= RCUT*ASMTH*MeshSize can be discarded. The short-range + * potential is modified by a complementary error function, multiplied + * with the Newtonian form. The resulting short-range suppression compared + * to the Newtonian force is tabulated, because looking up from this table + * is faster than recomputing the corresponding factor, despite the + * memory-access penalty (which reduces cache performance) incurred by the + * table. + * + * Depending on the value of TypeOfOpeningCriterion, either the geometrical BH + * cell-opening criterion, or the `relative' opening criterion is used. + * + * \param[in] in Gravdata communicated into function. + * \param[in, out] out Gravdata communicated from function. + * \param[in] target Index of the particle to be processed. + * \param[in] mode 0: process local particle (phase 1), 1: process imported + * particle (phase 2). + * \param[in] thread_id Id of this thread. + * \param[in, out] firstnode First node involved in this algorithm. + * \param[in] measure_cost_flag Whether the cost of the tree walk should be + * measured. + * + * \return Number of interactions processed for particle i. + */ +int force_treeevaluate(gravdata_in *in, gravdata_out *out, int target, int mode, int thread_id, int numnodes, int *firstnode, + int measure_cost_flag) +{ + struct NODE *nop = NULL; +#ifdef MULTIPLE_NODE_SOFTENING + struct ExtNODE *extnop = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ +#if !defined(GRAVITY_NOT_PERIODIC) + double xtmp, ytmp, ztmp; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + + double acc_x = 0; + double acc_y = 0; + double acc_z = 0; +#ifdef EVALPOTENTIAL + double pot = 0.0; +#endif /* #ifdef EVALPOTENTIAL */ + + int ninteractions = 0; + + double pos_x = in->Pos[0]; + double pos_y = in->Pos[1]; + double pos_z = in->Pos[2]; + double aold = All.ErrTolForceAcc * in->OldAcc; + double h_i = All.ForceSoftening[in->SofteningType]; + +#ifdef PMGRID + double rcut = All.Rcut[0]; + double asmth = All.Asmth[0]; +#ifdef PLACEHIGHRESREGION + if(pmforce_is_particle_high_res(in->Type, in->Pos)) + { + rcut = All.Rcut[1]; + asmth = All.Asmth[1]; + } +#endif /* #ifdef PLACEHIGHRESREGION */ + + double rcut2 = rcut * rcut; + double asmthinv = 0.5 / asmth; + double asmthinv2 = asmthinv * asmthinv; + double asmthfac = asmthinv * (NTAB / (RCUT / 2.0)); +#endif /* #ifdef PMGRID */ + + for(int k = 0; k < numnodes; k++) + { + int no; + + if(mode == 0) + no = Tree_MaxPart; /* root node */ + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + double dx, dy, dz, r2, mass, hmax; + +#ifdef MULTIPLE_NODE_SOFTENING + int indi_flag1 = -1, indi_flag2 = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(no < Tree_MaxPart) /* single particle */ + { + dx = GRAVITY_NEAREST_X(Tree_Pos_list[3 * no + 0] - pos_x); + dy = GRAVITY_NEAREST_Y(Tree_Pos_list[3 * no + 1] - pos_y); + dz = GRAVITY_NEAREST_Z(Tree_Pos_list[3 * no + 2] - pos_z); + r2 = dx * dx + dy * dy + dz * dz; + + mass = P[no].Mass; + + if(measure_cost_flag) + Thread[thread_id].P_CostCount[no]++; + + double h_j = All.ForceSoftening[P[no].SofteningType]; + + hmax = (h_j > h_i) ? h_j : h_i; + + no = Nextnode[no]; + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* we have an internal node */ + { + if(mode == 1) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + { + no = -1; + continue; + } + } + + nop = &Nodes[no]; + + mass = nop->u.d.mass; + dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x); + dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y); + dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + +#if defined(PMGRID) + if(r2 > rcut2) + { + /* check whether we can stop walking along this branch */ + double eff_dist = rcut + 0.5 * nop->len; + + double dist = GRAVITY_NEAREST_X(nop->center[0] - pos_x); + if(dist < -eff_dist || dist > eff_dist) + { + no = nop->u.d.sibling; + continue; + } + + dist = GRAVITY_NEAREST_Y(nop->center[1] - pos_y); + if(dist < -eff_dist || dist > eff_dist) + { + no = nop->u.d.sibling; + continue; + } + + dist = GRAVITY_NEAREST_Z(nop->center[2] - pos_z); + if(dist < -eff_dist || dist > eff_dist) + { + no = nop->u.d.sibling; + continue; + } + } +#endif /* #if defined(PMGRID) */ + + if(All.ErrTolTheta) /* check Barnes-Hut opening criterion */ + { + if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } + } + else /* check relative opening criterion */ + { + double len2 = nop->len * nop->len; + + if(len2 > r2 * (1.2 * 1.2)) /* add a worst case protection */ + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } + + // note that aold is strictly speaking |acceleration| / G +#ifdef ACTIVATE_MINIMUM_OPENING_ANGLE + if(mass * len2 > r2 * r2 * aold && len2 > r2 * (0.4 * 0.4)) +#else /* #ifdef ACTIVATE_MINIMUM_OPENING_ANGLE */ + if(mass * len2 > r2 * r2 * aold) +#endif /* #ifdef ACTIVATE_MINIMUM_OPENING_ANGLE #else */ + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } + + /* check in addition whether we lie inside or very close to the cell */ + if(fabs(GRAVITY_NEAREST_X(nop->center[0] - pos_x)) < 0.60 * nop->len) + { + if(fabs(GRAVITY_NEAREST_Y(nop->center[1] - pos_y)) < 0.60 * nop->len) + { + if(fabs(GRAVITY_NEAREST_Z(nop->center[2] - pos_z)) < 0.60 * nop->len) + { + no = nop->u.d.nextnode; + continue; + } + } + } + } + + double h_j = All.ForceSoftening[nop->u.d.maxsofttype]; + + if(h_j > h_i) + { +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype) + if(ExtNodes[no].mass_per_type[0] > 0) + if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype]) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + indi_flag1 = 0; + indi_flag2 = NSOFTTYPES; +#else /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(r2 < h_j * h_j) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */ + hmax = h_j; + } + else + hmax = h_i; + + /* ok, node can be used */ +#ifdef MULTIPLE_NODE_SOFTENING + extnop = &ExtNodes[no]; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(measure_cost_flag && mass) + Thread[thread_id].Node_CostCount[no]++; + + no = nop->u.d.sibling; + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + int n = no - Tree_ImportedNodeOffset; + + dx = GRAVITY_NEAREST_X(Tree_Points[n].Pos[0] - pos_x); + dy = GRAVITY_NEAREST_Y(Tree_Points[n].Pos[1] - pos_y); + dz = GRAVITY_NEAREST_Z(Tree_Points[n].Pos[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + mass = Tree_Points[n].Mass; + + if(measure_cost_flag) + Thread[thread_id].TreePoints_CostCount[n]++; + + double h_j = All.ForceSoftening[Tree_Points[n].SofteningType]; + + hmax = (h_j > h_i) ? h_j : h_i; + + no = Nextnode[no - Tree_MaxNodes]; + } + else /* pseudo particle */ + { + if(mode == 0) + { + tree_treefind_export_node_threads(no, target, thread_id); + } + + no = Nextnode[no - Tree_MaxNodes]; + continue; + } + + /* now evaluate the multipole moment */ + if(mass) + { + double r = sqrt(r2); + +#ifdef PMGRID + double tabentry = asmthfac * r; + int tabindex = (int)tabentry; + + if(tabindex < NTAB) + { + double tabweight = tabentry - tabindex; + double factor_force = (1.0 - tabweight) * shortrange_table[tabindex] + tabweight * shortrange_table[tabindex + 1]; +#ifdef EVALPOTENTIAL + double factor_pot = + (1.0 - tabweight) * shortrange_table_potential[tabindex] + tabweight * shortrange_table_potential[tabindex + 1]; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef PMGRID */ + +#ifdef MULTIPLE_NODE_SOFTENING + for(int type = indi_flag1; type < indi_flag2; type++) + { + if(type >= 0) + { + mass = extnop->mass_per_type[type]; + double h_j; +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(type == 0) + h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype]; + else +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + h_j = All.ForceSoftening[type]; + + hmax = (h_j > h_i) ? h_j : h_i; + } + + if(mass) + { +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + double fac; +#ifdef EVALPOTENTIAL + double wp; +#endif /* #ifdef EVALPOTENTIAL */ + + if(r >= hmax) + { + double rinv = 1.0 / r; + double rinv3 = rinv * rinv * rinv; +#ifdef PMGRID + fac = rinv3 + rinv * factor_force * asmthinv2; /* fac = -g'(r)/r */ +#ifdef EVALPOTENTIAL + wp = rinv * factor_pot; /* wp = -g(r) */ +#endif /* #ifdef EVALPOTENTIAL */ +#else /* #ifdef PMGRID */ + fac = rinv3; +#ifdef EVALPOTENTIAL + wp = -rinv; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef PMGRID #else */ + } + else + { + double h_inv = 1.0 / hmax; + double h3_inv = h_inv * h_inv * h_inv; + double u = r * h_inv; + + if(u < 0.5) + { + double u2 = u * u; + fac = h3_inv * (SOFTFAC1 + u2 * (SOFTFAC2 * u + SOFTFAC3)); +#ifdef EVALPOTENTIAL + wp = h_inv * (SOFTFAC4 + u2 * (SOFTFAC5 + u2 * (SOFTFAC6 * u + SOFTFAC7))); +#endif /* #ifdef EVALPOTENTIAL */ + } + else + { + double u2 = u * u; + double u3 = u2 * u; + fac = h3_inv * (SOFTFAC8 + SOFTFAC9 * u + SOFTFAC10 * u2 + SOFTFAC11 * u3 + SOFTFAC12 / u3); +#ifdef EVALPOTENTIAL + wp = h_inv * (SOFTFAC13 + SOFTFAC14 / u + + u2 * (SOFTFAC1 + u * (SOFTFAC15 + u * (SOFTFAC16 + SOFTFAC17 * u)))); +#endif /* #ifdef EVALPOTENTIAL */ + } + +#ifdef PMGRID + if(r > 0) + { + double rinv = 1.0 / r; + fac += rinv * factor_force * asmthinv2; /* fac = -g'(r)/r */ +#ifdef EVALPOTENTIAL + wp += rinv * (factor_pot + 1.0); /* wp = -g(r) */ +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef PMGRID */ + } + +#ifdef EVALPOTENTIAL + pot += mass * wp; +#endif /* #ifdef EVALPOTENTIAL */ + fac *= mass; + + acc_x += dx * fac; + acc_y += dy * fac; + acc_z += dz * fac; + +#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) + double fcorr[3]; + ewald_corr(dx, dy, dz, fcorr); + acc_x += mass * fcorr[0]; + acc_y += mass * fcorr[1]; + acc_z += mass * fcorr[2]; +#ifdef EVALPOTENTIAL + pot += mass * ewald_pot_corr(dx, dy, dz); +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ + +#ifdef MULTIPLE_NODE_SOFTENING + } + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + ninteractions++; +#ifdef PMGRID + } +#endif /* #ifdef PMGRID */ + } + } + } + + out->Acc[0] = acc_x; + out->Acc[1] = acc_y; + out->Acc[2] = acc_z; +#ifdef EVALPOTENTIAL + out->Potential = pot; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef NO_GRAVITY_TYPE + if(in->Type == NO_GRAVITY_TYPE) + { + out->Acc[0] = 0.0; + out->Acc[1] = 0.0; + out->Acc[2] = 0.0; +#ifdef EVALPOTENTIAL + out->Potential = 0.0; +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef NO_GRAVITY_TYPE */ +#ifdef OUTPUTGRAVINTERACTIONS + out->GravInteractions = ninteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + + return ninteractions; +} + +/*! \brief Prepares node to be exported. + * + * \param[in] no Index of node. + * \param[in] i Index of particle. + * \param[in] thread_id ID of thread. + * + * \return 0 + */ +int tree_treefind_export_node_threads(int no, int i, int thread_id) +{ + /* The task indicated by the pseudoparticle node */ + int task = DomainNewTask[no - (Tree_MaxPart + Tree_MaxNodes)]; + + if(Thread[thread_id].Exportflag[task] != i) + { + Thread[thread_id].Exportflag[task] = i; + int nexp = Thread[thread_id].Nexport++; + Thread[thread_id].PartList[nexp].Task = task; + Thread[thread_id].PartList[nexp].Index = i; + Thread[thread_id].ExportSpace -= Thread[thread_id].ItemSize; + } + + int nexp = Thread[thread_id].NexportNodes++; + nexp = -1 - nexp; + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[thread_id].PartList) + Thread[thread_id].InitialSpace); + nodelist[nexp].Task = task; + nodelist[nexp].Index = i; + nodelist[nexp].Node = DomainNodeIndex[no - (Tree_MaxPart + Tree_MaxNodes)]; + Thread[thread_id].ExportSpace -= sizeof(struct datanodelist) + sizeof(int); + return 0; +} + +#ifdef ALLOW_DIRECT_SUMMATION +/*! \brief Kernel of direct summation force calculation. + * + * \param[in] target Index of particle in import array. + * \param[in] result_idx Index in result array. + * \param[in] nimport number of imported particles. + * + * \return void + */ +void force_evaluate_direct(int target, int result_idx, int nimport) +{ +#if !defined(GRAVITY_NOT_PERIODIC) + double xtmp, ytmp, ztmp; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + + double acc_x = 0; + double acc_y = 0; + double acc_z = 0; +#ifdef EVALPOTENTIAL + double pot = 0.0; +#endif /* #ifdef EVALPOTENTIAL */ + + double pos_x = DirectDataAll[target].Pos[0]; + double pos_y = DirectDataAll[target].Pos[1]; + double pos_z = DirectDataAll[target].Pos[2]; + double h_i = All.ForceSoftening[DirectDataAll[target].SofteningType]; + +#ifdef PMGRID + double asmth = All.Asmth[0]; +#if defined(PLACEHIGHRESREGION) + int ptype_i = DirectDataAll[target].Type; + if(pmforce_is_particle_high_res(ptype_i, DirectDataAll[target].Pos)) + asmth = All.Asmth[1]; +#endif /* #if defined(PLACEHIGHRESREGION) */ + double asmthinv = 0.5 / asmth; + double asmthinv2 = asmthinv * asmthinv; + double asmthfac = asmthinv * (NTAB / (RCUT / 2.0)); +#endif /* #ifdef PMGRID */ + + for(int j = 0; j < nimport; j++) + { + double h_j = All.ForceSoftening[DirectDataAll[j].SofteningType]; + + double hmax = (h_j > h_i) ? h_j : h_i; + + double dx = GRAVITY_NEAREST_X(DirectDataAll[j].Pos[0] - pos_x); + double dy = GRAVITY_NEAREST_Y(DirectDataAll[j].Pos[1] - pos_y); + double dz = GRAVITY_NEAREST_Z(DirectDataAll[j].Pos[2] - pos_z); + + double r2 = dx * dx + dy * dy + dz * dz; + + double mass = DirectDataAll[j].Mass; + + /* now evaluate the force component */ + + double r = sqrt(r2); + +#ifdef PMGRID + double tabentry = asmthfac * r; + int tabindex = (int)tabentry; + + if(tabindex < NTAB) + { + double tabweight = tabentry - tabindex; + double factor_force = (1.0 - tabweight) * shortrange_table[tabindex] + tabweight * shortrange_table[tabindex + 1]; +#ifdef EVALPOTENTIAL + double factor_pot = + (1.0 - tabweight) * shortrange_table_potential[tabindex] + tabweight * shortrange_table_potential[tabindex + 1]; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef PMGRID */ + + double fac; +#ifdef EVALPOTENTIAL + double wp; +#endif /* #ifdef EVALPOTENTIAL */ + + if(r >= hmax) + { + double rinv = 1.0 / r; + double rinv3 = rinv * rinv * rinv; +#ifdef PMGRID + fac = rinv3 + rinv * factor_force * asmthinv2; /* fac = -g'(r)/r */ +#ifdef EVALPOTENTIAL + wp = rinv * factor_pot; /* wp = -g(r) */ +#endif /* #ifdef EVALPOTENTIAL */ +#else /* #ifdef PMGRID */ + fac = rinv3; +#ifdef EVALPOTENTIAL + wp = -rinv; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef PMGRID #else */ + } + else + { + double h_inv = 1.0 / hmax; + double h3_inv = h_inv * h_inv * h_inv; + double u = r * h_inv; + + if(u < 0.5) + { + double u2 = u * u; + fac = h3_inv * (SOFTFAC1 + u2 * (SOFTFAC2 * u + SOFTFAC3)); +#ifdef EVALPOTENTIAL + wp = h_inv * (SOFTFAC4 + u2 * (SOFTFAC5 + u2 * (SOFTFAC6 * u + SOFTFAC7))); +#endif /* #ifdef EVALPOTENTIAL */ + } + else + { + double u2 = u * u; + double u3 = u2 * u; + fac = h3_inv * (SOFTFAC8 + SOFTFAC9 * u + SOFTFAC10 * u2 + SOFTFAC11 * u3 + SOFTFAC12 / u3); +#ifdef EVALPOTENTIAL + wp = h_inv * (SOFTFAC13 + SOFTFAC14 / u + u2 * (SOFTFAC1 + u * (SOFTFAC15 + u * (SOFTFAC16 + SOFTFAC17 * u)))); +#endif /* #ifdef EVALPOTENTIAL */ + } +#ifdef PMGRID + if(r > 0) + { + double rinv = 1.0 / r; + fac += rinv * factor_force * asmthinv2; /* fac = -g'(r)/r */ +#ifdef EVALPOTENTIAL + wp += rinv * (factor_pot + 1.0); /* wp = -g(r) */ +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef PMGRID */ + } + +#ifdef EVALPOTENTIAL + pot += mass * wp; +#endif /* #ifdef EVALPOTENTIAL */ + fac *= mass; + + acc_x += dx * fac; + acc_y += dy * fac; + acc_z += dz * fac; + +#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) + { + double fcorr[3]; + ewald_corr(dx, dy, dz, fcorr); + acc_x += mass * fcorr[0]; + acc_y += mass * fcorr[1]; + acc_z += mass * fcorr[2]; +#if defined(EVALPOTENTIAL) + pot += mass * ewald_pot_corr(dx, dy, dz); +#endif /* #if defined(EVALPOTENTIAL) */ + } +#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ + +#ifdef PMGRID + } +#endif /* #ifdef PMGRID */ + } + + DirectAccOut[result_idx].Acc[0] = acc_x; + DirectAccOut[result_idx].Acc[1] = acc_y; + DirectAccOut[result_idx].Acc[2] = acc_z; +#ifdef EVALPOTENTIAL + DirectAccOut[result_idx].Potential = pot; +#endif /* #ifdef EVALPOTENTIAL */ +} +#endif /* #ifdef ALLOW_DIRECT_SUMMATION */ diff --git a/src/amuse/community/arepo/src/gravity/grav_external.c b/src/amuse/community/arepo/src/gravity/grav_external.c new file mode 100644 index 0000000000..784341a47b --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/grav_external.c @@ -0,0 +1,579 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/gravtree.c + * \date 05/2018 + * \brief Special gravity routines for external forces. + * \details contains functions: + * void gravity_external(void) + * static void gravity_external_get_force( double pos[3], + * int type, MyIDType ID, double acc[3], double *pot, int + * *flag_set ) + * void gravity_monopole_1d_spherical() + * double enclosed_mass(double R) + * void calc_exact_gravity_for_particle_type(void) + * void special_particle_create_list() + * void special_particle_update_list() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +#ifdef EXTERNALGRAVITY +static void gravity_external_get_force(double pos[3], int type, MyIDType ID, double acc[3], double *pot, int *flag_set); + +/*! \brief Main routine to add contribution of external gravitational potential + * to accelerations. + * + * Function is called in gravity() (in accel.c). Function also evaluates + * the gradient of the accelerations which is needed for the timestep + * criterion due to the external potential. + * + * \return void + */ +void gravity_external(void) +{ + mpi_printf("EXTERNALGRAVITY: execute\n"); + + TIMER_START(CPU_TREE); + + for(int idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + double *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + double acc[3], pot; + int flag_set = 0; + gravity_external_get_force(pos, P[i].Type, P[i].ID, acc, &pot, &flag_set); + + if(flag_set) + { + for(int k = 0; k < NUMDIMS; k++) + P[i].GravAccel[k] = acc[k]; + for(int k = NUMDIMS; k < 3; k++) + P[i].GravAccel[k] = 0; + P[i].ExtPotential = pot; + } + else + { + for(int k = 0; k < NUMDIMS; k++) + P[i].GravAccel[k] += acc[k]; +#ifdef EVALPOTENTIAL + P[i].Potential += pot; +#endif + P[i].ExtPotential += pot; + } + + double dx; + if(P[i].Type == 0) + dx = 0.1 * get_cell_radius(i); + else + dx = 0.1 * All.ForceSoftening[P[i].SofteningType]; + + P[i].dGravAccel = 0; + for(int dim = 0; dim < NUMDIMS; dim++) + { + double accL[3], posL[3]; + for(int k = 0; k < 3; k++) + posL[k] = pos[k]; + posL[dim] -= dx; + gravity_external_get_force(posL, P[i].Type, P[i].ID, accL, &pot, &flag_set); + + double accR[3], posR[3]; + for(int k = 0; k < 3; k++) + posR[k] = pos[k]; + posR[dim] += dx; + gravity_external_get_force(posR, P[i].Type, P[i].ID, accR, &pot, &flag_set); + + for(int k = 0; k < NUMDIMS; k++) + { + double dGrav = accR[k] - accL[k]; + P[i].dGravAccel += dGrav * dGrav; + } + } + P[i].dGravAccel = sqrt(P[i].dGravAccel) / (2. * dx); + } + + TIMER_STOP(CPU_TREE); +} + +/*! \brief Calculates the force from the external potential given a position. + * + * \param[in] pos Position at which force is to be evaluated. + * \param[in] type (unused) + * \param[in] ID (unused) + * \param[in, out] acc Acceleration array. + * \param[in, out] pot Pointer to potential. + * \param[in] flag_set (unused) + * + * \return void + */ +static void gravity_external_get_force(double pos[3], int type, MyIDType ID, double acc[3], double *pot, int *flag_set) +{ + for(int k = 0; k < 3; k++) + acc[k] = 0; + + *pot = 0; + +#ifdef EXTERNALGY + acc[1] += EXTERNALGY; + *pot = -(EXTERNALGY)*pos[1]; +#endif /* #ifdef EXTERNALGY */ + +#ifdef STATICISO + { + double r, m; + double dx, dy, dz; + + dx = pos[0] - boxHalf_X; + dy = pos[1] - boxHalf_Y; + dz = pos[2] - boxHalf_Z; + + r = sqrt(dx * dx + dy * dy + dz * dz); + + if(r > ISO_R200) + m = ISO_M200; + else + m = ISO_M200 * r / ISO_R200; + +#ifdef ISO_FRACTION + m *= ISO_FRACTION; +#endif /* #ifdef ISO_FRACTION */ + + if(r > 0) + { + acc[0] += -All.G * m * dx / r / (r * r + ISO_Eps * ISO_Eps); + acc[1] += -All.G * m * dy / r / (r * r + ISO_Eps * ISO_Eps); + acc[2] += -All.G * m * dz / r / (r * r + ISO_Eps * ISO_Eps); + } + } +#endif /* #ifdef STATICISO */ + +#ifdef STATICNFW + { + double r, m; + double dx, dy, dz; + + dx = pos[0] - boxHalf_X; + dy = pos[1] - boxHalf_Y; + dz = pos[2] - boxHalf_Z; + + r = sqrt(dx * dx + dy * dy + dz * dz); + m = enclosed_mass(r); +#ifdef NFW_DARKFRACTION + m *= NFW_DARKFRACTION; +#endif /* #ifdef NFW_DARKFRACTION */ + if(r > 0) + { + acc[0] += -All.G * m * dx / (r * r * r); + acc[1] += -All.G * m * dy / (r * r * r); + acc[2] += -All.G * m * dz / (r * r * r); + } + } +#endif /* #ifdef STATICNFW */ + +#ifdef STATICHQ + { + double r, m, a; + double dx, dy, dz; + + dx = pos[0] - boxHalf_X; + dy = pos[1] - boxHalf_Y; + dz = pos[2] - boxHalf_Z; + + r = sqrt(dx * dx + dy * dy + dz * dz); + + a = pow(All.G * HQ_M200 / (100 * All.Hubble * All.Hubble), 1.0 / 3) / HQ_C * sqrt(2 * (log(1 + HQ_C) - HQ_C / (1 + HQ_C))); + + m = HQ_M200 * pow(r / (r + a), 2); +#ifdef HQ_DARKFRACTION + m *= HQ_DARKFRACTION; +#endif /* #ifdef HQ_DARKFRACTION */ + if(r > 0) + { + acc[0] += -All.G * m * dx / (r * r * r); + acc[1] += -All.G * m * dy / (r * r * r); + acc[2] += -All.G * m * dz / (r * r * r); + } + } +#endif /* #ifdef STATICHQ */ +} +#endif /* #ifdef EXTERNALGRAVITY */ + +#ifdef ONEDIMS_SPHERICAL +/*! \brief One-dimensional gravity in the spherically symmetric case. + * + * \return void + */ +void gravity_monopole_1d_spherical() +{ + printf("Doing 1D gravity...\n"); + + int i; + double msum = All.CoreMass; + + for(i = 0; i < NumGas; i++) + { + double r0; + if(i > 0) + r0 = 0.5 * (P[i].Pos[0] + P[i - 1].Pos[0]); + else + r0 = All.CoreRadius; + double dm = 4. / 3. * M_PI * (SphP[i].Center[0] * SphP[i].Center[0] * SphP[i].Center[0] - r0 * r0 * r0) * SphP[i].Density; + double rad = SphP[i].Center[0]; + + P[i].GravAccel[0] = -(msum + dm) * All.G / (rad * rad); + +#ifdef EVALPOTENTIAL + P[i].Potential = -(msum + dm) * All.G / rad; +#endif /* #ifdef EVALPOTENTIAL */ + + msum += P[i].Mass; + + P[i].GravAccel[1] = 0; + P[i].GravAccel[2] = 0; + } + + printf("... 1D gravity done.\n"); +} +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#ifdef STATICNFW +/*! \brief Auxiliary function for static NFW potential. + * + * \param[in] R Radius from center of potential. + * + * \return Enclosed mass (which causes the external potential). + */ +double enclosed_mass(double R) +{ + /* Eps is in units of Rs !!!! */ + + if(R > Rs * NFW_C) + R = Rs * NFW_C; + + return fac * 4 * M_PI * RhoCrit * Dc * + (-(Rs * Rs * Rs * (1 - NFW_Eps + log(Rs) - 2 * NFW_Eps * log(Rs) + NFW_Eps * NFW_Eps * log(NFW_Eps * Rs))) / + ((NFW_Eps - 1) * (NFW_Eps - 1)) + + (Rs * Rs * Rs * + (Rs - NFW_Eps * Rs - (2 * NFW_Eps - 1) * (R + Rs) * log(R + Rs) + NFW_Eps * NFW_Eps * (R + Rs) * log(R + NFW_Eps * Rs))) / + ((NFW_Eps - 1) * (NFW_Eps - 1) * (R + Rs))); +} +#endif /* #ifdef STATICNFW */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +/*! \brief Routine that computes gravitational force by direct summation. + * + * Called by gravity() (in accel.c). + * + * \return void + */ +void calc_exact_gravity_for_particle_type(void) +{ + int i, idx; +#ifdef EXACT_GRAVITY_REACTION + double *accx, *accy, *accz; + accx = (double *)mymalloc("accx", All.MaxPartSpecial * sizeof(double)); + accy = (double *)mymalloc("accy", All.MaxPartSpecial * sizeof(double)); + accz = (double *)mymalloc("accz", All.MaxPartSpecial * sizeof(double)); +#ifdef EVALPOTENTIAL + double *pot; + pot = (double *)mymalloc("pot", All.MaxPartSpecial * sizeof(double)); +#endif /* #ifdef EVALPOTENTIAL */ + int n; + for(n = 0; n < All.MaxPartSpecial; n++) + { + accx[n] = accy[n] = accz[n] = 0.0; +#ifdef EVALPOTENTIAL + pot[n] = 0.0; +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef EXACT_GRAVITY_REACTION */ + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + double fac, wp; + double dx, dy, dz, r, r2; + double h, h_inv, h3_inv, u; + int k; + + /* set softening to corresponding particle's softening length */ + h = All.ForceSoftening[All.SofteningTypeOfPartType[EXACT_GRAVITY_FOR_PARTICLE_TYPE]]; + + for(k = 0; k < All.MaxPartSpecial; k++) + { + if(PartSpecialListGlobal[k].ID == P[i].ID) + continue; + + dx = P[i].Pos[0] - PartSpecialListGlobal[k].pos[0]; + dy = P[i].Pos[1] - PartSpecialListGlobal[k].pos[1]; + dz = P[i].Pos[2] - PartSpecialListGlobal[k].pos[2]; + + r2 = dx * dx + dy * dy + dz * dz; + r = sqrt(r2); + + // using spline softening + if(r >= h) + { + fac = 1 / (r2 * r); + wp = -1 / r; + } + else + { + h_inv = 1.0 / h; + h3_inv = h_inv * h_inv * h_inv; + u = r * h_inv; + + if(u < 0.5) + { + fac = h3_inv * (10.666666666667 + u * u * (32.0 * u - 38.4)); + wp = h_inv * (-2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6))); + } + else + { + fac = h3_inv * + (21.333333333333 - 48.0 * u + 38.4 * u * u - 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u)); + wp = h_inv * (-3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u)))); + } + } + + P[i].GravAccel[0] -= All.G * PartSpecialListGlobal[k].mass * fac * dx; + P[i].GravAccel[1] -= All.G * PartSpecialListGlobal[k].mass * fac * dy; + P[i].GravAccel[2] -= All.G * PartSpecialListGlobal[k].mass * fac * dz; + +#ifdef EVALPOTENTIAL + P[i].Potential += All.G * PartSpecialListGlobal[k].mass * wp; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef EXACT_GRAVITY_REACTION + /* avoid double counting */ + if(P[i].Type != EXACT_GRAVITY_FOR_PARTICLE_TYPE) + { + accx[k] += All.G * P[i].Mass * fac * dx; + accy[k] += All.G * P[i].Mass * fac * dy; + accz[k] += All.G * P[i].Mass * fac * dz; +#ifdef EVALPOTENTIAL + pot[k] += All.G * P[i].Mass * wp; +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef EXACT_GRAVITY_REACTION */ + } + } +#ifdef EXACT_GRAVITY_REACTION + double *buf = (double *)mymalloc("buf", All.MaxPartSpecial * sizeof(double)); + + MPI_Allreduce(accx, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + for(n = 0; n < All.MaxPartSpecial; n++) + accx[n] = buf[n]; + MPI_Allreduce(accy, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + for(n = 0; n < All.MaxPartSpecial; n++) + accy[n] = buf[n]; + MPI_Allreduce(accz, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + for(n = 0; n < All.MaxPartSpecial; n++) + accz[n] = buf[n]; +#ifdef EVALPOTENTIAL + MPI_Allreduce(pot, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + for(n = 0; n < All.MaxPartSpecial; n++) + pot[n] = buf[n]; +#endif /* #ifdef EVALPOTENTIAL */ + myfree(buf); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + for(n = 0; n < All.MaxPartSpecial; n++) + { + if(PartSpecialListGlobal[n].ID == P[i].ID) + { + P[i].GravAccel[0] += accx[n]; + P[i].GravAccel[1] += accy[n]; + P[i].GravAccel[2] += accz[n]; +#ifdef EVALPOTENTIAL + P[i].Potential += pot[n]; +#endif /* #ifdef EVALPOTENTIAL */ + } + } + } + +#ifdef EVALPOTENTIAL + myfree(pot); +#endif /* #ifdef EVALPOTENTIAL */ + myfree(accz); + myfree(accy); + myfree(accx); +#endif /* #ifdef EXACT_GRAVITY_REACTION */ +} + +/*! \brief Creates list of special particles, i.e. particles for which gravity + * is calculated by direct summation. + * + * Called in begrund2() (begrun.c), i.e. only at startup of the simulation. + * + * \return void + */ +void special_particle_create_list() +{ + struct special_particle_data *SpecialPartList; + SpecialPartList = + (struct special_particle_data *)mymalloc("SpecialPartList", All.MaxPartSpecial * sizeof(struct special_particle_data)); + + int i, j, nsrc, nimport, ngrp; + for(i = 0, nsrc = 0; i < NumPart; i++) + { + if(P[i].Type == EXACT_GRAVITY_FOR_PARTICLE_TYPE) + { + SpecialPartList[nsrc].ID = P[i].ID; + + SpecialPartList[nsrc].pos[0] = P[i].Pos[0]; + SpecialPartList[nsrc].pos[1] = P[i].Pos[1]; + SpecialPartList[nsrc].pos[2] = P[i].Pos[2]; + + SpecialPartList[nsrc++].mass = P[i].Mass; + } + } + + for(j = 0; j < NTask; j++) + Send_count[j] = nsrc; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = 0; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* exchange particle data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&SpecialPartList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct special_particle_data), + MPI_BYTE, recvTask, TAG_DENS_A, &PartSpecialListGlobal[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct special_particle_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(SpecialPartList); +} + +/*! \brief Updates list of special particles, i.e. particles for which gravity + * is calculated by direct summation. + * + * Called in run() (run.c). + * + * \return void + */ +void special_particle_update_list() +{ + struct special_particle_data *SpecialPartList; + SpecialPartList = + (struct special_particle_data *)mymalloc("SpecialPartList", All.MaxPartSpecial * sizeof(struct special_particle_data)); + + int i, j, nsrc, nimport, ngrp; + for(i = 0, nsrc = 0; i < NumPart; i++) + { + if(P[i].Type == EXACT_GRAVITY_FOR_PARTICLE_TYPE) + { + SpecialPartList[nsrc].ID = P[i].ID; + + SpecialPartList[nsrc].pos[0] = P[i].Pos[0]; + SpecialPartList[nsrc].pos[1] = P[i].Pos[1]; + SpecialPartList[nsrc].pos[2] = P[i].Pos[2]; + + SpecialPartList[nsrc++].mass = P[i].Mass; + } + } + + for(j = 0; j < NTask; j++) + Send_count[j] = nsrc; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = 0; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* exchange particle data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&SpecialPartList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct special_particle_data), + MPI_BYTE, recvTask, TAG_DENS_A, &PartSpecialListGlobal[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct special_particle_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(SpecialPartList); +} +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ diff --git a/src/amuse/community/arepo/src/gravity/grav_softening.c b/src/amuse/community/arepo/src/gravity/grav_softening.c new file mode 100644 index 0000000000..4494f4df08 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/grav_softening.c @@ -0,0 +1,215 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/gravtree.c + * \date 05/2018 + * \brief Routines for setting the gravitational softening lengths. + * \details contains functions: + * void set_softenings(void) + * int get_softeningtype_for_hydro_cell(int i) + * double get_default_softening_of_particletype(int type) + * int get_softening_type_from_mass(double mass) + * double get_desired_softening_from_mass(double mass) + * void init_individual_softenings(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +/*! \brief Sets the (comoving) softening length of all particle + * types in the table All.SofteningTable[...]. + * + * A check is performed that the physical softening length is bounded by the + * Softening-MaxPhys values. + * + * \return void + */ +void set_softenings(void) +{ + int i; + + if(All.ComovingIntegrationOn) + { + for(i = 0; i < NSOFTTYPES; i++) + if(All.SofteningComoving[i] * All.Time > All.SofteningMaxPhys[i]) + All.SofteningTable[i] = All.SofteningMaxPhys[i] / All.Time; + else + All.SofteningTable[i] = All.SofteningComoving[i]; + } + else + { + for(i = 0; i < NSOFTTYPES; i++) + All.SofteningTable[i] = All.SofteningComoving[i]; + } + +#ifdef ADAPTIVE_HYDRO_SOFTENING + for(i = 0; i < NSOFTTYPES_HYDRO; i++) + All.SofteningTable[i + NSOFTTYPES] = All.MinimumComovingHydroSoftening * pow(All.AdaptiveHydroSofteningSpacing, i); + + if(All.AdaptiveHydroSofteningSpacing < 1) + terminate("All.AdaptiveHydroSofteningSpacing < 1"); + +#ifdef MULTIPLE_NODE_SOFTENING + /* we check that type=0 has its own slot 0 in the softening types, so that only gas masses are stored there */ + if(All.SofteningTypeOfPartType[0] != 0) + terminate("All.SofteningTypeOfPartType[0] != 0"); + + for(i = 1; i < NTYPES; i++) + if(All.SofteningTypeOfPartType[i] == All.SofteningTypeOfPartType[0]) + terminate("i=%d: All.SofteningTypeOfPartType[i] == All.SofteningTypeOfPartType[0]", i); +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + + for(i = 0; i < NSOFTTYPES + NSOFTTYPES_HYDRO; i++) + All.ForceSoftening[i] = 2.8 * All.SofteningTable[i]; + + All.ForceSoftening[NSOFTTYPES + NSOFTTYPES_HYDRO] = 0; /* important - this entry is actually used */ +} + +#ifdef ADAPTIVE_HYDRO_SOFTENING +/*! \brief Finds the index of the softening table for a given cell depending + * on its radius. + * + * \param[in] i Index of cell in SphP array. + * + * \return Index of corresponding softening in softening lookup-table. + */ +int get_softeningtype_for_hydro_cell(int i) +{ + double soft = All.GasSoftFactor * get_cell_radius(i); + + if(soft <= All.ForceSoftening[NSOFTTYPES]) + return NSOFTTYPES; + + int k = 0.5 + log(soft / All.ForceSoftening[NSOFTTYPES]) / log(All.AdaptiveHydroSofteningSpacing); + if(k >= NSOFTTYPES_HYDRO) + k = NSOFTTYPES_HYDRO - 1; + + return NSOFTTYPES + k; +} +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + +/*! \brief Returns the default softening length for particle type 'type'. + * + * \param[in] type Type of the local particle. + * + * \return The softening length of particle with type 'type'. + */ +double get_default_softening_of_particletype(int type) { return All.SofteningTable[All.SofteningTypeOfPartType[type]]; } + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING +/*! \brief Determines the softening type from the mass of a particle. + * + * \param[in] mass Mass of the particle. + * + * \return Index in gravitational softening table. + */ +int get_softening_type_from_mass(double mass) +{ + int i, min_type = -1; + double eps = get_desired_softening_from_mass(mass); + double min_dln = MAX_FLOAT_NUMBER; + +#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) + i = 1; +#else /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */ + i = 0; +#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) #else */ + + for(; i < NSOFTTYPES; i++) + { + if(All.ForceSoftening[i] > 0) + { + double dln = fabs(log(eps) - log(All.ForceSoftening[i])); + + if(dln < min_dln) + { + min_dln = dln; + min_type = i; + } + } + } + if(min_type < 0) + terminate("min_type < 0 mass=%g eps=%g All.AvgType1Mass=%g All.ForceSoftening[1]=%g", mass, eps, All.AvgType1Mass, + All.ForceSoftening[1]); + + return min_type; +} + +/*! \brief Returns the softening length of softening type 1 + * particles depending on the particle mass. + * + * \param[in] mass Particle mass. + * + * \return Softening length for a softening type 1 particle of mass 'mass'. + */ +double get_desired_softening_from_mass(double mass) +{ + if(mass <= All.AvgType1Mass) + return 2.8 * All.SofteningComoving[1]; + else + return 2.8 * All.SofteningComoving[1] * pow(mass / All.AvgType1Mass, 1.0 / 3); +} + +/*! \brief Initializes the mass dependent softening calculation for Type 1 + * particles. + * + * The average mass of Type 1 particles is calculated. + * + * \return void + */ +void init_individual_softenings(void) +{ + int i, ndm; + double mass, masstot; + long long ndmtot; + + for(i = 0, ndm = 0, mass = 0; i < NumPart; i++) + if(P[i].Type == 1) + { + ndm++; + mass += P[i].Mass; + } + sumup_large_ints(1, &ndm, &ndmtot); + MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + All.AvgType1Mass = masstot / ndmtot; + + mpi_printf("INIT: AvgType1Mass = %g\n", All.AvgType1Mass); + + for(i = 0; i < NumPart; i++) + { + if(((1 << P[i].Type) & (INDIVIDUAL_GRAVITY_SOFTENING))) + P[i].SofteningType = get_softening_type_from_mass(P[i].Mass); + } +} +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ diff --git a/src/amuse/community/arepo/src/gravity/gravdirect.c b/src/amuse/community/arepo/src/gravity/gravdirect.c new file mode 100644 index 0000000000..cbe7be7426 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/gravdirect.c @@ -0,0 +1,259 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravdirect.c + * \date 05/2018 + * \brief Main driver routines for gravitational (short-range) force + * computation through direct summation + * \details Note that this is not the same thing as + * EXACT_GRAVITY_FOR_PARTICLE_TYPE! + * ALLOW_DIRECT_SUMMATION does direct summation for performance + * reasons if there is only a small number of interactions to be + * calculated and the overhead of a tree-construction would be + * more expensive than the direct summation calculation, while + * EXACT_GRAVITY_FOR_PARTICLE_TYPE always enforces a direct + * summation for all particle pairs of a given type. + * contains functions: + * void gravity_direct(int timebin) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +#ifdef ALLOW_DIRECT_SUMMATION +static int Nimport; + +/*! \brief Computes the gravitational forces for all active particles through + * direct summation. + * + * \param[in] timebin (unused) + * + * \return void + */ +void gravity_direct(int timebin) +{ + int i, j, k, idx; + + TIMER_START(CPU_TREEDIRECT); + + if(TimeBinsGravity.GlobalNActiveParticles <= 1) + { + if(TimeBinsGravity.NActiveParticles > 0) + { + i = TimeBinsGravity.ActiveParticleList[0]; + if(i >= 0) + { + for(k = 0; k < 3; k++) + P[i].GravAccel[k] = 0; + +#ifdef EVALPOTENTIAL + P[i].Potential = 0; +#endif /* #ifdef EVALPOTENTIAL */ + } + } + + mpi_printf("Found only %d particles to do direct summation -> SKIPPING IT\n", TimeBinsGravity.GlobalNActiveParticles); + TIMER_STOP(CPU_TREEDIRECT); + return; + } + + mpi_printf("GRAVDIRECT: direct summation. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + double tstart = second(); + + DirectDataIn = (struct directdata *)mymalloc("DirectDataIn", TimeBinsGravity.NActiveParticles * sizeof(struct directdata)); + + Nforces = 0; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(k = 0; k < 3; k++) + DirectDataIn[Nforces].Pos[k] = SphP[i].Center[k]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(k = 0; k < 3; k++) + DirectDataIn[Nforces].Pos[k] = P[i].Pos[k]; + } + + DirectDataIn[Nforces].Mass = P[i].Mass; + + DirectDataIn[Nforces].Type = P[i].Type; + DirectDataIn[Nforces].SofteningType = P[i].SofteningType; + + Nforces++; + } + + MPI_Allgather(&Nforces, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, Nimport = 0, Recv_offset[0] = 0; j < NTask; j++) + { + Nimport += Recv_count[j]; + + if(j > 0) + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + + DirectDataAll = (struct directdata *)mymalloc("DirectDataAll", Nimport * sizeof(struct directdata)); + + for(j = 0; j < NTask; j++) + { + Send_count[j] = Recv_count[j] * sizeof(struct directdata); + Send_offset[j] = Recv_offset[j] * sizeof(struct directdata); + } + + MPI_Allgatherv(DirectDataIn, Nforces * sizeof(struct directdata), MPI_BYTE, DirectDataAll, Send_count, Send_offset, MPI_BYTE, + MPI_COMM_WORLD); + + /* subdivide the work evenly */ + int first, count; + subdivide_evenly(Nimport, NTask, ThisTask, &first, &count); + + DirectAccOut = (struct accdata *)mymalloc("DirectDataOut", count * sizeof(struct accdata)); + + /* now calculate the forces */ + for(i = 0; i < count; i++) + force_evaluate_direct(i + first, i, Nimport); + + /* now send the forces to the right places */ + + DirectAccIn = (struct accdata *)mymalloc("DirectDataIn", Nforces * sizeof(struct accdata)); + + MPI_Request *requests = (MPI_Request *)mymalloc_movable(&requests, "requests", 2 * NTask * sizeof(MPI_Request)); + int n_requests = 0; + + int recvTask = 0; + int sendTask = 0; + int send_first, send_count; + subdivide_evenly(Nimport, NTask, sendTask, &send_first, &send_count); + + while(recvTask < NTask && sendTask < NTask) /* go through both lists */ + { + while(send_first + send_count < Recv_offset[recvTask]) + { + if(sendTask >= NTask - 1) + terminate("sendTask >= NTask recvTask=%d sendTask=%d", recvTask, sendTask); + + sendTask++; + subdivide_evenly(Nimport, NTask, sendTask, &send_first, &send_count); + } + + while(Recv_offset[recvTask] + Recv_count[recvTask] < send_first) + { + if(recvTask >= NTask - 1) + terminate("recvTask >= NTask recvTask=%d sendTask=%d", recvTask, sendTask); + + recvTask++; + } + + int start = imax(Recv_offset[recvTask], send_first); + int next = imin(Recv_offset[recvTask] + Recv_count[recvTask], send_first + send_count); + + if(next - start >= 1) + { + if(ThisTask == sendTask) + MPI_Isend(DirectAccOut + start - send_first, (next - start) * sizeof(struct accdata), MPI_BYTE, recvTask, TAG_PDATA_SPH, + MPI_COMM_WORLD, &requests[n_requests++]); + + if(ThisTask == recvTask) + MPI_Irecv(DirectAccIn + start - Recv_offset[recvTask], (next - start) * sizeof(struct accdata), MPI_BYTE, sendTask, + TAG_PDATA_SPH, MPI_COMM_WORLD, &requests[n_requests++]); + } + + if(next == Recv_offset[recvTask] + Recv_count[recvTask]) + recvTask++; + else + { + sendTask++; + if(sendTask >= NTask) + break; + + subdivide_evenly(Nimport, NTask, sendTask, &send_first, &send_count); + } + } + + MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE); + myfree(requests); + + Nforces = 0; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(k = 0; k < 3; k++) + P[i].GravAccel[k] = DirectAccIn[Nforces].Acc[k]; + +#ifdef EVALPOTENTIAL + P[i].Potential = DirectAccIn[Nforces].Potential; +#endif /* #ifdef EVALPOTENTIAL */ + Nforces++; + } + + myfree(DirectAccIn); + myfree(DirectAccOut); + myfree(DirectDataAll); + myfree(DirectDataIn); + + mpi_printf("GRAVDIRECT: force is done.\n"); + + All.TotNumOfForces += TimeBinsGravity.GlobalNActiveParticles; + + double tend = second(); + + double timedirect, sumt; + timedirect = tend - tstart; + + MPI_Reduce(&timedirect, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + fprintf(FdTimings, "Nf=%9lld active part/task: avg=%g total-Nf=%lld\n", TimeBinsGravity.GlobalNActiveParticles, + ((double)TimeBinsGravity.GlobalNActiveParticles) / NTask, All.TotNumOfForces); + fprintf(FdTimings, " (direct) part/sec: %g ia/sec: %g\n", TimeBinsGravity.GlobalNActiveParticles / (sumt + 1.0e-20), + TimeBinsGravity.GlobalNActiveParticles / (sumt + 1.0e-20) * TimeBinsGravity.GlobalNActiveParticles); + myflush(FdTimings); + } + + TIMER_STOP(CPU_TREEDIRECT); +} + +#endif /* #ifdef ALLOW_DIRECT_SUMMATION */ diff --git a/src/amuse/community/arepo/src/gravity/gravtree.c b/src/amuse/community/arepo/src/gravity/gravtree.c new file mode 100644 index 0000000000..810aa9c3da --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/gravtree.c @@ -0,0 +1,749 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/gravtree.c + * \date 05/2018 + * \brief Main driver routines for gravitational (short-range) force + * computation. + * \details This file contains the code for the gravitational force + * computation by means of the tree algorithm. To this end, a tree + * force is computed for all active local particles, and particles + * are exported to other processors if needed, where they can + * receive additional force contributions. If the TreePM algorithm + * is enabled, the force computed will only be the short-range + * part. + * contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void gravity_primary_loop(void) + * void gravity_secondary_loop(void) + * void gravity_tree(int timebin) + * static int gravity_evaluate(int target, int mode, int + * threadid) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +static double ThreadsCosttotal[NUM_THREADS]; /*!< The total cost of the particles/nodes processed by each thread */ +static int ThreadFirstExec[NUM_THREADS]; /*!< Keeps track, if a given thread executes the gravity_primary_loop() for the first time */ +static int MeasureCostFlag; /*!< Whether the tree costs are measured for the current time step */ + +static int gravity_evaluate(int target, int mode, int threadid); + +typedef gravdata_in data_in; + +typedef gravdata_out data_out; + +#ifdef DETAILEDTIMINGS +static double tstart; +static int current_timebin; +#endif /* #ifdef DETAILEDTIMINGS */ + +/* local data structure for collecting particle/cell data that is sent to other processors if needed */ +static data_in *DataIn, *DataGet; +static data_out *DataResult, *DataOut; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + if(i < NumPart) + { +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(int k = 0; k < 3; k++) + in->Pos[k] = SphP[i].Center[k]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(int k = 0; k < 3; k++) + in->Pos[k] = P[i].Pos[k]; + } + + in->Type = P[i].Type; + in->SofteningType = P[i].SofteningType; + in->OldAcc = P[i].OldAcc; + } + else + { + i -= Tree_ImportedNodeOffset; + + for(int k = 0; k < 3; k++) + in->Pos[k] = Tree_Points[i].Pos[k]; + + in->Type = Tree_Points[i].Type; + in->SofteningType = Tree_Points[i].SofteningType; + in->OldAcc = Tree_Points[i].OldAcc; + } + in->Firstnode = firstnode; +} + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + if(i < NumPart) + { + P[i].GravAccel[0] = out->Acc[0]; + P[i].GravAccel[1] = out->Acc[1]; + P[i].GravAccel[2] = out->Acc[2]; +#ifdef EVALPOTENTIAL + P[i].Potential = out->Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + P[i].GravInteractions = out->GravNinteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + } + else + { + int idx = Tree_ResultIndexList[i - Tree_ImportedNodeOffset]; + Tree_ResultsActiveImported[idx].GravAccel[0] = out->Acc[0]; + Tree_ResultsActiveImported[idx].GravAccel[1] = out->Acc[1]; + Tree_ResultsActiveImported[idx].GravAccel[2] = out->Acc[2]; +#ifdef EVALPOTENTIAL + Tree_ResultsActiveImported[idx].Potential = out->Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + Tree_ResultsActiveImported[idx].GravInteractions = out->GravNinteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + } + } + else /* combine */ + { + if(i < NumPart) + { + P[i].GravAccel[0] += out->Acc[0]; + P[i].GravAccel[1] += out->Acc[1]; + P[i].GravAccel[2] += out->Acc[2]; +#ifdef EVALPOTENTIAL + P[i].Potential += out->Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + P[i].GravInteractions += out->GravNinteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + } + else + { + int idx = Tree_ResultIndexList[i - Tree_ImportedNodeOffset]; + Tree_ResultsActiveImported[idx].GravAccel[0] += out->Acc[0]; + Tree_ResultsActiveImported[idx].GravAccel[1] += out->Acc[1]; + Tree_ResultsActiveImported[idx].GravAccel[2] += out->Acc[2]; +#ifdef EVALPOTENTIAL + Tree_ResultsActiveImported[idx].Potential += out->Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + Tree_ResultsActiveImported[idx].GravInteractions += out->GravNinteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + } + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Primary loop of gravity calculation. + * + * Gravitational interactions between local particles; see gravity_tree(..). + * Equivalent to 'kernel_local'. + * + * \return void + */ +static void gravity_primary_loop(void) +{ + TIMER_STOPSTART(CPU_TREEBALSNDRCV, CPU_TREEWALK1); + +#ifdef DETAILEDTIMINGS + double t0 = second(); +#endif /* #ifdef DETAILEDTIMINGS */ + + int idx; + /* do local particles */ + { + int j, threadid = get_thread_num(); + double cost = 0; + + if(ThreadFirstExec[threadid]) + { + ThreadFirstExec[threadid] = 0; + + if(MeasureCostFlag) + { + memset(Thread[threadid].P_CostCount, 0, NumPart * sizeof(int)); + memset(Thread[threadid].TreePoints_CostCount, 0, Tree_NumPartImported * sizeof(int)); + memset(Thread[threadid].Node_CostCount + Tree_MaxPart, 0, Tree_NumNodes * sizeof(int)); + } + } + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= Nforces) + break; + + int i = TargetList[idx]; + + cost += gravity_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + + ThreadsCosttotal[threadid] += cost; + } + +#ifdef DETAILEDTIMINGS + double t1 = second(); + + fprintf(FdDetailed, "%d %d %d %d %g %g\n", All.NumCurrentTiStep, current_timebin, DETAILED_TIMING_GRAVWALK, MODE_LOCAL_PARTICLES, + timediff(tstart, t0), timediff(tstart, t1)); +#endif /* #ifdef DETAILEDTIMINGS */ + + TIMER_STOPSTART(CPU_TREEWALK1, CPU_TREEBALSNDRCV); +} + +/*! \brief Secondary loop of gravity calculation. + * + * Gravitational interactions between imported particles; see gravity_tree(.). + * Equivalent to 'kernel_imported'. + * + * \return void + */ +void gravity_secondary_loop(void) +{ + TIMER_STOPSTART(CPU_TREEBALSNDRCV, CPU_TREEWALK2); + +#ifdef DETAILEDTIMINGS + double t0 = second(); +#endif /* #ifdef DETAILEDTIMINGS */ + + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + double cost = 0; + + if(ThreadFirstExec[threadid]) + { + ThreadFirstExec[threadid] = 0; + + if(MeasureCostFlag) + { + memset(Thread[threadid].P_CostCount, 0, NumPart * sizeof(int)); + memset(Thread[threadid].TreePoints_CostCount, 0, Tree_NumPartImported * sizeof(int)); + memset(Thread[threadid].Node_CostCount + Tree_MaxPart, 0, Tree_NumNodes * sizeof(int)); + } + } + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + cost += gravity_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + + ThreadsCosttotal[threadid] += cost; + } + +#ifdef DETAILEDTIMINGS + double t1 = second(); + + fprintf(FdDetailed, "%d %d %d %d %g %g\n", All.NumCurrentTiStep, current_timebin, DETAILED_TIMING_GRAVWALK, MODE_IMPORTED_PARTICLES, + timediff(tstart, t0), timediff(tstart, t1)); +#endif /* #ifdef DETAILEDTIMINGS */ + + TIMER_STOPSTART(CPU_TREEWALK2, CPU_TREEBALSNDRCV); +} + +/*! \brief This function computes the gravitational forces for all active + * particles. + * + * The tree walk is done in two phases: First the local part of the force tree + * is processed (gravity_primary_loop() ). Whenever an external node is + * encountered during the walk, this node is saved on a list. This node list + * along with data about the particles is then exchanged among tasks. + * In the second phase (gravity_secondary_loop() ) each task now continues + * the tree walk for the imported particles. Finally the resulting partial + * forces are send back to the original task and are summed up there to + * complete the tree force calculation. + * + * If only the tree algorithm is used in a periodic box, the whole tree walk + * is done twice. First a normal tree walk is done as described above, and + * afterwards a second tree walk, which adds the needed Ewald corrections is + * performed. + * + * Particles are only exported to other processors when really needed, + * thereby allowing a good use of the communication buffer. Every particle is + * sent at most once to a given processor together with the complete list of + * relevant tree nodes to be checked on the other task. + * + * Particles which drifted into the domain of another task are sent to this + * task for the force computation. Afterwards the resulting force is sent + * back to the originating task. + * + * In order to improve the work load balancing during a domain decomposition, + * the work done by each node/particle is measured. The work is measured for + * the interaction partners (i.e. the nodes or particles) and not for the + * particles itself that require a force computation. This way, work done for + * imported particles is accounted for at the task where the work actually + * incurred. The cost measurement is only done for the "GRAVCOSTLEVELS" + * highest occupied time bins. The variable 'MeasureCostFlag' will state + * whether a measurement is done at the present time step. + * + * The particles requiring a force computation are split into chunks of size + * 'Nchunksize'. A set of every 'Nchunk' -th chunk is processed first. + * Then the process is repeated, processing the next set of chunks. This way + * the amount of exported particles is more balanced, as communication heavy + * regions are mixed with less communication intensive regions. + * + * \param[in] timebin Time bin for which gravity should be calculated. + * + * \return void + */ +void gravity_tree(int timebin) +{ + int idx, i, j, k, ncount, iter = 0, maxiter; + struct detailed_timings + { + double all, tree1, tree2, tree, commwait; + double sumnexport, costtotal, numnodes; + ; + } timer, tisum, timax; + memset(&timer, 0, sizeof(struct detailed_timings)); + double Costtotal; + int ngrp; + int recvTask; + + TIMER_STORE; + TIMER_START(CPU_TREE); + + /* allocate buffers to arrange communication */ + mpi_printf("GRAVTREE: Begin tree force. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + TIMER_STOPSTART(CPU_TREE, CPU_TREECOSTMEASURE); + + for(i = 0; i < NUM_THREADS; i++) + { + ThreadsCosttotal[i] = 0; + ThreadFirstExec[i] = 0; + } + + /* find the level (if any) for which we measure gravity cost */ + for(i = 0, TakeLevel = -1; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] == timebin) + { + TakeLevel = i; + break; + } + } + + if(TakeLevel < 0) /* we have not found a matching slot */ + { + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] < 0) + { + All.LevelToTimeBin[i] = timebin; + TakeLevel = i; + All.LevelHasBeenMeasured[i] = 0; + break; + } + } + + if(TakeLevel < 0) + { + if(All.HighestOccupiedGravTimeBin - timebin < GRAVCOSTLEVELS) /* we should have space */ + { + /* clear levels that are out of range */ + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] > All.HighestOccupiedGravTimeBin) + { + All.LevelToTimeBin[i] = timebin; + TakeLevel = i; + All.LevelHasBeenMeasured[i] = 0; + break; + } + if(All.LevelToTimeBin[i] < All.HighestOccupiedGravTimeBin - (GRAVCOSTLEVELS - 1)) + { + All.LevelToTimeBin[i] = timebin; + TakeLevel = i; + All.LevelHasBeenMeasured[i] = 0; + break; + } + } + + if(TakeLevel < 0) + { + if(timebin > All.HighestOccupiedGravTimeBin) + { + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] == All.HighestOccupiedGravTimeBin) + { + All.LevelToTimeBin[i] = timebin; + TakeLevel = i; + All.LevelHasBeenMeasured[i] = 0; + break; + } + } + } + } + + if(TakeLevel < 0) + { + mpi_printf("All.HighestOccupiedGravTimeBin=%d timebin=%d\n", All.HighestOccupiedGravTimeBin, timebin); + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + mpi_printf("All.LevelToTimeBin[i=%d]=%d\n", i, All.LevelToTimeBin[i]); + } + + fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + + terminate("TakeLevel=%d < 0", TakeLevel); + } + } + } + } + + MeasureCostFlag = 0; + + if(TakeLevel >= 0) + if(All.LevelHasBeenMeasured[TakeLevel] == 0) + { + MeasureCostFlag = 1; + + Thread[0].P_CostCount = mymalloc("Thread_P_CostCount", NumPart * sizeof(int)); + Thread[0].TreePoints_CostCount = mymalloc("Threads_TreePoints_CostCount", Tree_NumPartImported * sizeof(int)); + Thread[0].Node_CostCount = mymalloc("Threads_Node_CostCount", Tree_NumNodes * sizeof(int)); + + for(i = 1; i < NUM_THREADS; i++) + { + Thread[i].P_CostCount = mymalloc("Threads_P_CostCount", NumPart * sizeof(int)); + Thread[i].TreePoints_CostCount = mymalloc("Threads_TreePoints_CostCount", Tree_NumPartImported * sizeof(int)); + Thread[i].Node_CostCount = mymalloc("Threads_Node_CostCount", Tree_NumNodes * sizeof(int)); + } + + for(i = 0; i < NUM_THREADS; i++) + Thread[i].Node_CostCount -= Tree_MaxPart; + + for(i = 0; i < NUM_THREADS; i++) + ThreadFirstExec[i] = 1; + } + + TIMER_STOPSTART(CPU_TREECOSTMEASURE, CPU_TREE); + + /* Create list of targets. We do this here to simplify the treatment of the two possible sources of points */ + + TargetList = mymalloc("TargetList", (NumPart + Tree_NumPartImported) * sizeof(int)); + Tree_ResultIndexList = mymalloc("Tree_ResultIndexList", Tree_NumPartImported * sizeof(int)); + + Nforces = 0; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(Tree_Task_list[i] == ThisTask) + TargetList[Nforces++] = i; + } + + for(i = 0, ncount = 0; i < Tree_NumPartImported; i++) +#ifndef HIERARCHICAL_GRAVITY + if(Tree_Points[i].ActiveFlag) +#endif /* #ifndef HIERARCHICAL_GRAVITY */ + { + Tree_ResultIndexList[i] = ncount++; + TargetList[Nforces++] = i + Tree_ImportedNodeOffset; + } + + Tree_ResultsActiveImported = mymalloc("Tree_ResultsActiveImported", ncount * sizeof(struct resultsactiveimported_data)); + + permutate_chunks_in_list(Nforces, TargetList); + + generic_set_MaxNexport(); + + /******************************************/ + /* now execute the tree walk calculations */ + /******************************************/ + + TIMER_STOPSTART(CPU_TREE, CPU_TREEBALSNDRCV); + +#ifdef DETAILEDTIMINGS + tstart = second(); + current_timebin = timebin; +#endif /* #ifdef DETAILEDTIMINGS */ + + iter = generic_comm_pattern(Nforces, gravity_primary_loop, gravity_secondary_loop); + + /* now communicate the forces in Tree_ResultsActiveImported */ + + TIMER_STOPSTART(CPU_TREEBALSNDRCV, CPU_TREESENDBACK); + +#ifdef DETAILEDTIMINGS + double tend = second(); + + fprintf(FdDetailed, "%d %d %d %d %g %g\n", All.NumCurrentTiStep, current_timebin, DETAILED_TIMING_GRAVWALK, MODE_FINISHED, + timediff(tstart, tend), timediff(tstart, tend)); + fflush(FdDetailed); +#endif /* #ifdef DETAILEDTIMINGS */ + + for(j = 0; j < NTask; j++) + Recv_count[j] = 0; + + int n; + for(i = 0, n = 0, k = 0; i < NTask; i++) + for(j = 0; j < Force_Recv_count[i]; j++, n++) + { +#ifndef HIERARCHICAL_GRAVITY + if(Tree_Points[n].ActiveFlag) +#endif /* #ifndef HIERARCHICAL_GRAVITY */ + { + Tree_ResultsActiveImported[k].index = Tree_Points[n].index; + Recv_count[i]++; + k++; + } + } + + MPI_Alltoall(Recv_count, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, Nexport = 0, Nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + Nexport += Send_count[j]; + Nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct resultsactiveimported_data *tmp_results = mymalloc("tmp_results", Nexport * sizeof(struct resultsactiveimported_data)); + memset(tmp_results, -1, Nexport * sizeof(struct resultsactiveimported_data)); + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + MPI_Sendrecv(&Tree_ResultsActiveImported[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct resultsactiveimported_data), MPI_BYTE, recvTask, TAG_FOF_A, + &tmp_results[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct resultsactiveimported_data), + MPI_BYTE, recvTask, TAG_FOF_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + for(i = 0; i < Nexport; i++) + { + int target = tmp_results[i].index; + + for(k = 0; k < 3; k++) + P[target].GravAccel[k] = tmp_results[i].GravAccel[k]; + } + + myfree(tmp_results); + + myfree(Tree_ResultsActiveImported); + myfree(Tree_ResultIndexList); + myfree(TargetList); + + TIMER_STOPSTART(CPU_TREESENDBACK, CPU_TREECOSTMEASURE); + + /* assign node cost to particles */ + if(MeasureCostFlag) + { + for(int threadid = 0; threadid < NUM_THREADS; threadid++) + if(ThreadFirstExec[threadid]) + { + /* this could happen if neither the primary nor the secondary loop had anything to do */ + ThreadFirstExec[threadid] = 0; + memset(Thread[threadid].P_CostCount, 0, NumPart * sizeof(int)); + memset(Thread[threadid].TreePoints_CostCount, 0, Tree_NumPartImported * sizeof(int)); + memset(Thread[threadid].Node_CostCount + Tree_MaxPart, 0, Tree_NumNodes * sizeof(int)); + } + + force_assign_cost_values(); + domain_init_sum_cost(); + + All.LevelHasBeenMeasured[TakeLevel] = 1; + + if(All.TypeOfOpeningCriterion == 1 && All.Ti_Current == 0) + All.LevelHasBeenMeasured[TakeLevel] = 0; + + for(i = 0; i < NUM_THREADS; i++) + Thread[i].Node_CostCount += Tree_MaxPart; + + for(i = NUM_THREADS - 1; i >= 1; i--) + { + myfree(Thread[i].Node_CostCount); + myfree(Thread[i].TreePoints_CostCount); + myfree(Thread[i].P_CostCount); + } + + myfree(Thread[0].Node_CostCount); + myfree(Thread[0].TreePoints_CostCount); + myfree(Thread[0].P_CostCount); + } + + TIMER_STOPSTART(CPU_TREECOSTMEASURE, CPU_TREE); + + if(All.TypeOfOpeningCriterion == 1) + All.ErrTolTheta = 0; /* This will switch to the relative opening criterion for the following force computations */ + + mpi_printf("GRAVTREE: tree-force is done.\n"); + + /* gather some diagnostic information */ + + TIMER_STOPSTART(CPU_TREE, CPU_LOGS); + + Costtotal = 0; + for(i = 0; i < NUM_THREADS; i++) + Costtotal += ThreadsCosttotal[i]; + + timer.tree1 = TIMER_DIFF(CPU_TREEWALK1); + timer.tree2 = TIMER_DIFF(CPU_TREEWALK2); + timer.tree = timer.tree1 + timer.tree2; + timer.commwait = TIMER_DIFF(CPU_TREEBALSNDRCV) + TIMER_DIFF(CPU_TREESENDBACK); + timer.all = timer.tree + timer.commwait + TIMER_DIFF(CPU_TREE) + TIMER_DIFF(CPU_TREECOSTMEASURE); + timer.sumnexport = SumNexport; + timer.costtotal = Costtotal; + timer.numnodes = Tree_NumNodes; + + MPI_Reduce(&iter, &maxiter, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce((double *)&timer, (double *)&tisum, (int)(sizeof(struct detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_SUM, 0, + MPI_COMM_WORLD); + MPI_Reduce((double *)&timer, (double *)&timax, (int)(sizeof(struct detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_MAX, 0, + MPI_COMM_WORLD); + + All.TotNumOfForces += TimeBinsGravity.GlobalNActiveParticles; + + if(ThisTask == 0) + { + fprintf(FdTimings, "Nf=%9lld timebin=%d total-Nf=%lld\n", TimeBinsGravity.GlobalNActiveParticles, timebin, All.TotNumOfForces); + + fprintf(FdTimings, " work-load balance: %g (%g %g), rel1to2: %g\n", timax.tree / ((tisum.tree + 1e-20) / NTask), + timax.tree1 / ((tisum.tree1 + 1e-20) / NTask), timax.tree2 / ((tisum.tree2 + 1e-20) / NTask), + tisum.tree1 / (tisum.tree1 + tisum.tree2 + 1e-20)); + fprintf(FdTimings, " number of iterations: max=%d, exported fraction: %g\n", maxiter, + tisum.sumnexport / (TimeBinsGravity.GlobalNActiveParticles + 1e-20)); + fprintf(FdTimings, " part/sec: raw=%g, effective=%g ia/part: avg=%g\n", + TimeBinsGravity.GlobalNActiveParticles / (tisum.tree + 1.0e-20), + TimeBinsGravity.GlobalNActiveParticles / ((timax.tree + 1.0e-20) * NTask), + tisum.costtotal / (TimeBinsGravity.GlobalNActiveParticles + 1.0e-20)); + + fprintf(FdTimings, " maximum number of nodes: %g, filled: %g\n", timax.numnodes, timax.numnodes / Tree_MaxNodes); + + fprintf(FdTimings, " avg times: all=%g tree1=%g tree2=%g commwait=%g sec\n", tisum.all / NTask, tisum.tree1 / NTask, + tisum.tree2 / NTask, tisum.commwait / NTask); + + myflush(FdTimings); + } + + TIMER_STOP(CPU_LOGS); +} + +/*! \brief Evaluate-function for gravitational tree. Calls + * force_treeevaluate. + * + * \param[in] target Index of particle. + * \param[in] mode Flag if local or imported particles should be considered. + * \param[in] threadid ID or thread. + * + * \return Number of interactions processed for this particle. + */ +static int gravity_evaluate(int target, int mode, int threadid) +{ + int cost, numnodes, *firstnode; + data_in local, *target_data; + data_out out, *target_result; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + target_result = &out; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + target_result = &DataResult[target]; + generic_get_numnodes(target, &numnodes, &firstnode); + } + + cost = force_treeevaluate(target_data, target_result, target, mode, threadid, numnodes, firstnode, MeasureCostFlag); + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + + /* note: for imported particles, we already have the result places into DataResult[target] */ + + return cost; +} diff --git a/src/amuse/community/arepo/src/gravity/gravtree_forcetest.c b/src/amuse/community/arepo/src/gravity/gravtree_forcetest.c new file mode 100644 index 0000000000..54e1c5c299 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/gravtree_forcetest.c @@ -0,0 +1,1089 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/gravtree_forcetest.c + * \date 05/2018 + * \brief Test short range gravity evaluation. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void gravity_forcetest(void) + * static void gravity_forcetest_evaluate(int target, int mode, + * int threadid) + * void gravity_forcetest_testforcelaw(void) + * static void ewald_other_images(double x, double y, double z, + * double alpha, double force[4]) + * static void ewald_correction_force(double x, double y, + * double z, double force[4]) + * void forcetest_ewald_init(void) + * static void ewald_correction_force_table_lookup(double dx, + * double dy, double dz, double force[4]) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +#ifdef FORCETEST + +#if !defined(EVALPOTENTIAL) && defined(FORCETEST) +#error "When you enable FORCETEST you should also switch on EVALPOTENTIAL" +#endif /* #if !defined(EVALPOTENTIAL) && defined(FORCETEST) */ + +static void gravity_forcetest_evaluate(int target, int mode, int threadid); +static void ewald_correction_force(double x, double y, double z, double force[4]); +static void ewald_other_images(double x, double y, double z, double alpha, double force[4]); +static void ewald_correction_force_table_lookup(double x, double y, double z, double force[4]); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + unsigned char Type; + unsigned char SofteningType; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(int k = 0; k < 3; k++) + in->Pos[k] = SphP[i].Center[k]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(int k = 0; k < 3; k++) + in->Pos[k] = P[i].Pos[k]; + } + + in->Type = P[i].Type; + in->SofteningType = P[i].SofteningType; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Acc[3]; + MyFloat Pot; + MyFloat DistToID1; +#ifdef PMGRID + MyFloat AccLongRange[3]; + MyFloat AccShortRange[3]; + MyFloat PotLongRange; + MyFloat PotShortRange; +#endif /* #ifdef PMGRID */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + P[i].GravAccelDirect[0] = out->Acc[0]; + P[i].GravAccelDirect[1] = out->Acc[1]; + P[i].GravAccelDirect[2] = out->Acc[2]; + P[i].PotentialDirect = out->Pot; + P[i].DistToID1 = out->DistToID1; +#ifdef PMGRID + P[i].GravAccelLongRange[0] = out->AccLongRange[0]; + P[i].GravAccelLongRange[1] = out->AccLongRange[1]; + P[i].GravAccelLongRange[2] = out->AccLongRange[2]; + P[i].GravAccelShortRange[0] = out->AccShortRange[0]; + P[i].GravAccelShortRange[1] = out->AccShortRange[1]; + P[i].GravAccelShortRange[2] = out->AccShortRange[2]; + P[i].PotentialLongRange = out->PotLongRange; + P[i].PotentialShortRange = out->PotShortRange; +#endif /* #ifdef PMGRID */ + } + else /* combine */ + { + P[i].GravAccelDirect[0] += out->Acc[0]; + P[i].GravAccelDirect[1] += out->Acc[1]; + P[i].GravAccelDirect[2] += out->Acc[2]; + P[i].PotentialDirect += out->Pot; + if(out->DistToID1 > 0) + P[i].DistToID1 = out->DistToID1; +#ifdef PMGRID + P[i].GravAccelLongRange[0] += out->AccLongRange[0]; + P[i].GravAccelLongRange[1] += out->AccLongRange[1]; + P[i].GravAccelLongRange[2] += out->AccLongRange[2]; + P[i].GravAccelShortRange[0] += out->AccShortRange[0]; + P[i].GravAccelShortRange[1] += out->AccShortRange[1]; + P[i].GravAccelShortRange[2] += out->AccShortRange[2]; + P[i].PotentialLongRange += out->PotLongRange; + P[i].PotentialShortRange += out->PotShortRange; +#endif /* #ifdef PMGRID */ + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= TimeBinsGravity.NActiveParticles) + break; + + i = TimeBinsGravity.ActiveParticleList[i]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + gravity_forcetest_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + gravity_forcetest_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief This function computes the gravitational forces for all active + * particles. + * + * A new tree is constructed, if the number of force computations since + * it's last construction exceeds some fraction of the total + * particle number, otherwise tree nodes are dynamically updated if needed. + * + * \return void + */ +void gravity_forcetest(void) +{ + int nthis, nloc, ntot; + int idx, i, j; + double fac1; + char buf[200]; + + nloc = 0; + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(get_random_number() < FORCETEST) + { + P[i].TimeBinGrav = -P[i].TimeBinGrav - 1; /* Mark as selected */ + nloc++; + } + } + + MPI_Allreduce(&nloc, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("FORCETEST: Testing forces of %d particles\n", ntot); + + double t0 = second(); + + generic_set_MaxNexport(); + + generic_comm_pattern(TimeBinsGravity.NActiveParticles, kernel_local, kernel_imported); + + double t1 = second(); + double maxt = timediff(t0, t1); + + /* muliply by G */ + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + { + for(j = 0; j < 3; j++) + { + P[i].GravAccelDirect[j] *= All.G; +#ifdef PMGRID + P[i].GravAccelLongRange[j] *= All.G; + P[i].GravAccelShortRange[j] *= All.G; +#endif /* #ifdef PMGRID */ + } + + P[i].PotentialDirect *= All.G; +#ifdef PMGRID + P[i].PotentialLongRange *= All.G; + P[i].PotentialShortRange *= All.G; +#endif /* #ifdef PMGRID */ + } + } + + /* Finally, the following factor allows a computation of cosmological simulation + with vacuum energy in physical coordinates */ + + if(All.ComovingIntegrationOn == 0) + { + fac1 = All.OmegaLambda * All.Hubble * All.Hubble; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + for(j = 0; j < 3; j++) + P[i].GravAccelDirect[j] += fac1 * P[i].Pos[j]; + } + } + + /* now output the forces to a file */ + + for(nthis = 0; nthis < NTask; nthis++) + { + if(nthis == ThisTask) + { + sprintf(buf, "%s%s", All.OutputDir, "forcetest.txt"); + + if(!(FdForceTest = fopen(buf, "a"))) + terminate("error in opening file '%s'\n", buf); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + { +#ifdef PMGRID + fprintf(FdForceTest, + "%d %d %lld %g %g %g %g %g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g " + "%15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g\n", + P[i].Type, ThisTask, (long long)P[i].ID, All.Time, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2], P[i].DistToID1, + P[i].GravAccelDirect[0], P[i].GravAccelDirect[1], P[i].GravAccelDirect[2], P[i].GravAccelShortRange[0], + P[i].GravAccelShortRange[1], P[i].GravAccelShortRange[2], P[i].GravAccelLongRange[0], + P[i].GravAccelLongRange[1], P[i].GravAccelLongRange[2], P[i].GravAccel[0], P[i].GravAccel[1], + P[i].GravAccel[2], P[i].GravPM[0], P[i].GravPM[1], P[i].GravPM[2], P[i].PotentialDirect, + P[i].PotentialShortRange, P[i].PotentialLongRange, P[i].Potential, P[i].PM_Potential); +#else /* #ifdef PMGRID */ + fprintf(FdForceTest, + "%d %d %lld %g %g %g %g %g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g\n", P[i].Type, + ThisTask, (long long)P[i].ID, All.Time, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2], P[i].DistToID1, + P[i].GravAccelDirect[0], P[i].GravAccelDirect[1], P[i].GravAccelDirect[2], P[i].GravAccel[0], + P[i].GravAccel[1], P[i].GravAccel[2], P[i].PotentialDirect, P[i].Potential); +#endif /* #ifdef PMGRID #else */ + } + } + + fclose(FdForceTest); + } + + MPI_Barrier(MPI_COMM_WORLD); + } + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + P[i].TimeBinGrav = -P[i].TimeBinGrav - 1; + } + + /* Now the force computation is finished */ + + if(ThisTask == 0) + { + double costtotal = NumPart * ntot; + + fprintf(FdTimings, "DIRECT Nf= %d part/sec=%g | %g ia/part=%g\n\n", ntot, ((double)ntot) / (NTask * maxt + 1.0e-20), + ntot / ((maxt + 1.0e-20) * NTask), ((double)(costtotal)) / (ntot + 1.0e-20)); + + myflush(FdTimings); + } +} + +/*! \brief This function does the gravitational force computation with direct + * summation for the specified particle. + * + * This can be useful for debugging purposes, in particular for explicit + * checks of the force accuracy reached with the tree. Depending on whether + * or not a PMGRID is used, the code does a short-range tree-walk or a full + * one. + * + * \param i Index of the particle to be processed. + * \param mode 0: process local particle (phase 1), 1: process imported + * particle (phase 2). + * \param thread_id Id of this thread. + * \param measure_cost_flag Whether the cost of the tree walk should be + * measured. + * + * \return Number of interactions processed for particle i. + */ +static void gravity_forcetest_evaluate(int target, int mode, int threadid) +{ + int j; + double h_i, h_j, hmax, mass, dx, dy, dz, r, r2, fac, wp, fac_newton, wp_newton; + double pos_x, pos_y, pos_z; +#ifdef PMGRID + double asmth = All.Asmth[0]; +#endif /* #ifdef PMGRID */ +#if !defined(GRAVITY_NOT_PERIODIC) + double xtmp, ytmp, ztmp; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + + double acc_x = 0; + double acc_y = 0; + double acc_z = 0; + double pot = 0; + double disttoid1 = 0; + + data_out out; + data_in local, *target_data; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + /* make sure that the particle is exported to all other tasks */ + for(int task = 0; task < NTask; task++) + if(task != ThisTask) + { + if(Thread[threadid].Exportflag[task] != target) + { + Thread[threadid].Exportflag[task] = target; + int nexp = Thread[threadid].Nexport++; + Thread[threadid].PartList[nexp].Task = task; + Thread[threadid].PartList[nexp].Index = target; + Thread[threadid].ExportSpace -= Thread[threadid].ItemSize; + } + + int nexp = Thread[threadid].NexportNodes++; + nexp = -1 - nexp; + struct datanodelist *nodelist = + (struct datanodelist *)(((char *)Thread[threadid].PartList) + Thread[threadid].InitialSpace); + nodelist[nexp].Task = task; + nodelist[nexp].Index = target; + nodelist[nexp].Node = 0; /* the node doesn't matter here */ + Thread[threadid].ExportSpace -= sizeof(struct datanodelist) + sizeof(int); + } + } + else + { + target_data = &DataGet[target]; + } + + pos_x = target_data->Pos[0]; + pos_y = target_data->Pos[1]; + pos_z = target_data->Pos[2]; + h_i = All.ForceSoftening[target_data->SofteningType]; + +#ifdef PLACEHIGHRESREGION + if(pmforce_is_particle_high_res(target_data->Type, target_data->Pos)) + asmth = All.Asmth[1]; +#endif /* #ifdef PLACEHIGHRESREGION */ + + out.Pot = 0; +#ifdef PMGRID + out.PotShortRange = 0; + out.PotLongRange = 0; +#endif /* #ifdef PMGRID */ + + for(int i = 0; i < 3; i++) + { + out.Acc[i] = 0; +#ifdef PMGRID + out.AccShortRange[i] = 0; + out.AccLongRange[i] = 0; +#endif /* #ifdef PMGRID */ + } + + for(j = 0; j < NumPart; j++) + { + h_j = All.ForceSoftening[P[j].SofteningType]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + +#ifdef CELL_CENTER_GRAVITY + if(P[j].Type == 0) + { + dx = GRAVITY_NEAREST_X(SphP[j].Center[0] - pos_x); + dy = GRAVITY_NEAREST_Y(SphP[j].Center[1] - pos_y); + dz = GRAVITY_NEAREST_Z(SphP[j].Center[2] - pos_z); + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + dx = GRAVITY_NEAREST_X(P[j].Pos[0] - pos_x); + dy = GRAVITY_NEAREST_Y(P[j].Pos[1] - pos_y); + dz = GRAVITY_NEAREST_Z(P[j].Pos[2] - pos_z); + } + + r2 = dx * dx + dy * dy + dz * dz; + + mass = P[j].Mass; + + /* now evaluate the multipole moment */ + + r = sqrt(r2); + + if(P[j].ID == 1) + disttoid1 = r; + + /* we compute 3 different forces: + * (1) The correct direct summation force, if needed with Ewald correction: ftrue + * In the case of PM: + * (2) The short range direct summation force with only the erfc cut-off (this is what the tree can at best deliver): fsr + * (3) The expected PM force based on the long-range part of the Ewald sum. This is equal to ftrue - fsr - fsfr_periodic_images + * */ + + if(r > 0) + { + fac_newton = mass / (r2 * r); + wp_newton = -mass / r; + } + else + { + fac_newton = 0; + wp_newton = 0; + } + + if(r >= hmax) + { + fac = fac_newton; + wp = wp_newton; + } + else + { + double h_inv = 1.0 / hmax; + double h3_inv = h_inv * h_inv * h_inv; + double u = r * h_inv; + + if(u < 0.5) + { + double u2 = u * u; + fac = mass * h3_inv * (SOFTFAC1 + u2 * (SOFTFAC2 * u + SOFTFAC3)); + wp = mass * h_inv * (SOFTFAC4 + u2 * (SOFTFAC5 + u2 * (SOFTFAC6 * u + SOFTFAC7))); + } + else + { + double u2 = u * u, u3 = u2 * u; + fac = mass * h3_inv * (SOFTFAC8 + SOFTFAC9 * u + SOFTFAC10 * u2 + SOFTFAC11 * u3 + SOFTFAC12 / u3); + wp = mass * h_inv * (SOFTFAC13 + SOFTFAC14 / u + u2 * (SOFTFAC1 + u * (SOFTFAC15 + u * (SOFTFAC16 + SOFTFAC17 * u)))); + } + } + + double acc_newton_x = dx * fac; + double acc_newton_y = dy * fac; + double acc_newton_z = dz * fac; + double pot_newton = wp; + +#ifdef PMGRID + double u = 0.5 / asmth * r; + + double factor_force = (erfc(u) + 2.0 * u / sqrt(M_PI) * exp(-u * u) - 1.0); + double factor_pot = erfc(u); + + fac += fac_newton * factor_force; + wp += wp_newton * (factor_pot - 1.0); + + double acc_short_x = dx * fac; + double acc_short_y = dy * fac; + double acc_short_z = dz * fac; + double pot_short = wp + mass * M_PI / (asmth * asmth * boxSize_X * boxSize_Y * boxSize_Z); + + out.AccShortRange[0] += acc_short_x; + out.AccShortRange[1] += acc_short_y; + out.AccShortRange[2] += acc_short_z; + out.PotShortRange += pot_short; +#endif /* #ifdef PMGRID */ + +#if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) + double fcorr[4]; + +#if !defined(FORCETEST_TESTFORCELAW) + ewald_correction_force_table_lookup(dx, dy, dz, fcorr); +#else /* #if !defined(FORCETEST_TESTFORCELAW) */ + ewald_correction_force(dx, dy, dz, fcorr); +#endif /* #if !defined(FORCETEST_TESTFORCELAW) #else */ + + acc_x = acc_newton_x + mass * fcorr[0]; + acc_y = acc_newton_y + mass * fcorr[1]; + acc_z = acc_newton_z + mass * fcorr[2]; + + pot = pot_newton + mass * fcorr[3]; +#else /* #if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ + acc_x = acc_newton_x; + acc_y = acc_newton_y; + acc_z = acc_newton_z; + pot = pot_newton; +#endif /* #if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) #else */ + + out.Acc[0] += acc_x; + out.Acc[1] += acc_y; + out.Acc[2] += acc_z; + out.Pot += pot; + +#ifdef PMGRID + double fimages[4] = {0, 0, 0, 0}; +#ifdef FORCETEST_TESTFORCELAW + ewald_other_images(dx, dy, dz, 0.5 / asmth, fimages); +#endif /* #ifdef FORCETEST_TESTFORCELAW */ + out.AccLongRange[0] += acc_x - acc_short_x - mass * fimages[0]; + out.AccLongRange[1] += acc_y - acc_short_y - mass * fimages[1]; + out.AccLongRange[2] += acc_z - acc_short_z - mass * fimages[2]; + out.PotLongRange += pot - pot_short - mass * fimages[3]; +#endif /* #ifdef PMGRID */ + } + + out.DistToID1 = disttoid1; + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; +} + +#ifdef FORCETEST_TESTFORCELAW +/*! \brief Places particle with ID 1 radomly in box and calculates force on it. + * + * \return void + */ +void gravity_forcetest_testforcelaw(void) +{ + int Ncycles = 40; + double xyz[3], eps; + + ngb_treefree(); + mark_active_timebins(); + + for(int cycle = 0; cycle < Ncycles; cycle++) + { + mpi_printf("\nTEST-FORCE-LAW: cycle=%d|%d ----------------------------------\n\n", cycle, Ncycles); + + double epsloc = 0, xyzloc[3] = {0, 0, 0}; + + /* set particle with ID=1 to new random coordinate in box */ + for(int n = 0; n < NumPart; n++) + { + P[n].Type = 1; + + if(P[n].ID == 1) + { + xyzloc[0] = All.BoxSize * STRETCHX * get_random_number(); + xyzloc[1] = All.BoxSize * STRETCHY * get_random_number(); + xyzloc[2] = All.BoxSize * STRETCHZ * get_random_number(); + +#if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) + for(int j = 0; j < 3; j++) + xyzloc[j] = 0.5 * (All.Xmintot[1][j] + All.Xmaxtot[1][j]); +#endif /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) */ + +#if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 2) + if(get_random_number() < 0.5) + { + for(int j = 0; j < 3; j++) + xyzloc[j] = All.Xmintot[1][j] + get_random_number() * (All.Xmaxtot[1][j] - All.Xmintot[1][j]); + } +#endif /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 2) */ + + for(int i = 0; i < 3; i++) + P[n].Pos[i] = xyzloc[i]; + + epsloc = All.ForceSoftening[P[n].SofteningType]; + } + } + + MPI_Allreduce(xyzloc, xyz, 3, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&epsloc, &eps, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + double rmin = 0.01 * eps; + double rmax = + sqrt(pow(0.5 * All.BoxSize * STRETCHX, 2) + pow(0.5 * All.BoxSize * STRETCHY, 2) + pow(0.5 * All.BoxSize * STRETCHZ, 2)); + + for(int n = 0; n < NumPart; n++) + { + if(P[n].ID != 1) + { + double r = exp(log(rmin) + (log(rmax) - log(rmin)) * get_random_number()); + double theta = acos(2 * get_random_number() - 1); + double phi = 2 * M_PI * get_random_number(); + + double dx = r * sin(theta) * cos(phi); + double dy = r * sin(theta) * sin(phi); + double dz = r * cos(theta); + + double xtmp, ytmp, ztmp; + P[n].Pos[0] = WRAP_X(xyz[0] + dx); + P[n].Pos[1] = WRAP_Y(xyz[1] + dy); + P[n].Pos[2] = WRAP_Z(xyz[2] + dz); + } + } + + domain_free(); + domain_Decomposition(); /* do domain decomposition if needed */ + +#ifdef PMGRID + long_range_force(); +#endif /* #ifdef PMGRID */ + + compute_grav_accelerations(All.HighestActiveTimeBin, FLAG_FULL_TREE); + } + + endrun(); +} +#endif /* #ifdef FORCETEST_TESTFORCELAW */ + +/*! \brief Periodicity effects in gravity. + * + * \param[in] x X coordinate of point. + * \param[in] y Y coordinate of point. + * \param[in] z Z coordinate of point. + * \param[in] alpha Cutoff for tree-PM. + * \param[out] force Force vector. + */ +static void ewald_other_images(double x, double y, double z, double alpha, double force[4]) +{ + double signx, signy, signz; + + for(int i = 0; i < 4; i++) + force[i] = 0; + + double r2 = x * x + y * y + z * z; + + if(r2 == 0) + return; + + if(x < 0) + { + x = -x; + signx = +1; + } + else + signx = -1; + if(y < 0) + { + y = -y; + signy = +1; + } + else + signy = -1; + if(z < 0) + { + z = -z; + signz = +1; + } + else + signz = -1; + + double alpha2 = alpha * alpha; + + const int nmax = 4; + + for(int nx = -nmax; nx <= nmax; nx++) + for(int ny = -nmax; ny <= nmax; ny++) + for(int nz = -nmax; nz <= nmax; nz++) + { + if(nx != 0 || ny != 0 || nz != 0) + { + double dx = x - nx * STRETCHX * All.BoxSize; + double dy = y - ny * STRETCHY * All.BoxSize; + double dz = z - nz * STRETCHZ * All.BoxSize; + double r2 = dx * dx + dy * dy + dz * dz; + double r = sqrt(r2); + double val = erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2); + double val2 = val / (r2 * r); + double val3 = erfc(alpha * r) / r; + + force[0] -= dx * val2; + force[1] -= dy * val2; + force[2] -= dz * val2; + force[3] -= val3; + } + } + + force[0] *= signx; + force[1] *= signy; + force[2] *= signz; +} + +/*! \brief Force due to periodic boundary conditions. + * + * \param[in] x X coordinate of point. + * \param[in] y Y coordinate of point. + * \param[in] z Z coordinate of point. + * \param[out] force Force vector. + */ +static void ewald_correction_force(double x, double y, double z, double force[4]) +{ + double signx, signy, signz; + + for(int i = 0; i < 4; i++) + force[i] = 0; + + double r2 = x * x + y * y + z * z; + + if(r2 == 0) + return; + + if(x < 0) + { + x = -x; + signx = +1; + } + else + signx = -1; + if(y < 0) + { + y = -y; + signy = +1; + } + else + signy = -1; + if(z < 0) + { + z = -z; + signz = +1; + } + else + signz = -1; + + double lmin = imin(imin(STRETCHX, STRETCHY), STRETCHZ); + double alpha = 2.0 / lmin / All.BoxSize; + double alpha2 = alpha * alpha; + double r = sqrt(r2); + double r3inv = 1.0 / (r2 * r); + + force[0] += r3inv * x; + force[1] += r3inv * y; + force[2] += r3inv * z; + + const int nmax = 6; + + for(int nx = -nmax; nx <= nmax; nx++) + for(int ny = -nmax; ny <= nmax; ny++) + for(int nz = -nmax; nz <= nmax; nz++) + { + double dx = x - nx * STRETCHX * All.BoxSize; + double dy = y - ny * STRETCHY * All.BoxSize; + double dz = z - nz * STRETCHZ * All.BoxSize; + double r2 = dx * dx + dy * dy + dz * dz; + double r = sqrt(r2); + double val = erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2); + double val2 = val / (r2 * r); + double val3 = erfc(alpha * r) / r; /* for potential */ + + force[0] -= dx * val2; + force[1] -= dy * val2; + force[2] -= dz * val2; + force[3] -= val3; + } + + int nxmax = (int)(4 * alpha * All.BoxSize * (STRETCHX / lmin) + 0.5); + int nymax = (int)(4 * alpha * All.BoxSize * (STRETCHY / lmin) + 0.5); + int nzmax = (int)(4 * alpha * All.BoxSize * (STRETCHZ / lmin) + 0.5); + + for(int nx = -nxmax; nx <= nxmax; nx++) + for(int ny = -nymax; ny <= nymax; ny++) + for(int nz = -nzmax; nz <= nzmax; nz++) + { + double kx = (2.0 * M_PI / (All.BoxSize * STRETCHX)) * nx; + double ky = (2.0 * M_PI / (All.BoxSize * STRETCHY)) * ny; + double kz = (2.0 * M_PI / (All.BoxSize * STRETCHZ)) * nz; + double k2 = kx * kx + ky * ky + kz * kz; + + if(k2 > 0) + { + double kdotx = (x * kx + y * ky + z * kz); + double vv = 4.0 * M_PI / (k2 * pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ) * exp(-k2 / (4.0 * alpha2)); + double val = vv * sin(kdotx); + double val2 = vv * cos(kdotx); + force[0] -= kx * val; + force[1] -= ky * val; + force[2] -= kz * val; + force[3] -= val2; + } + } + + force[3] += M_PI / (alpha2 * pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ) + 1.0 / r; + + force[0] *= signx; + force[1] *= signy; + force[2] *= signz; +} + +#if !defined(FORCETEST_TESTFORCELAW) + +#define TEW_N 128 + +#define TEW_NX (DBX * STRETCHX * TEW_N) +#define TEW_NY (DBY * STRETCHY * TEW_N) +#define TEW_NZ (DBZ * STRETCHZ * TEW_N) + +static double Ewd_table[4][TEW_NX + 1][TEW_NY + 1][TEW_NZ + 1]; +static double Ewd_table_intp; + +/*! \brief Initializes Ewald correction force test. + * + * \return void + */ +void forcetest_ewald_init(void) +{ + double t0 = second(); + + mpi_printf("FORCETEST: initialize high-res Ewald lookup table...\n"); + +#ifdef LONG_X + if(LONG_X != (int)(LONG_X)) + terminate("LONG_X must be an integer"); +#endif /* #ifdef LONG_X */ + +#ifdef LONG_Y + if(LONG_Y != (int)(LONG_Y)) + terminate("LONG_Y must be an integer"); +#endif /* #ifdef LONG_Y */ + +#ifdef LONG_Z + if(LONG_Z != (int)(LONG_Z)) + terminate("LONG_Z must be an integer"); +#endif /* #ifdef LONG_Z */ + + /* ok, let's compute things. Actually, we do that in parallel. */ + int size = (TEW_NX + 1) * (TEW_NY + 1) * (TEW_NZ + 1); + int first, count; + + subdivide_evenly(size, NTask, ThisTask, &first, &count); + + for(int n = first; n < first + count; n++) + { + int i = n / ((TEW_NY + 1) * (TEW_NZ + 1)); + int j = (n - i * (TEW_NY + 1) * (TEW_NZ + 1)) / (TEW_NZ + 1); + int k = (n - i * (TEW_NY + 1) * (TEW_NZ + 1) - j * (TEW_NZ + 1)); + + if(ThisTask == 0) + { + if(((n - first) % (count / 20)) == 0) + { + printf("%4.1f percent done\n", (n - first) / (count / 100.0)); + myflush(stdout); + } + } + + double xx = 0.5 * DBX * STRETCHX * ((double)i) / TEW_NX * All.BoxSize; + double yy = 0.5 * DBY * STRETCHY * ((double)j) / TEW_NY * All.BoxSize; + double zz = 0.5 * DBZ * STRETCHZ * ((double)k) / TEW_NZ * All.BoxSize; + + double fcorr[4]; + ewald_correction_force(xx, yy, zz, fcorr); + + for(int rep = 0; rep < 4; rep++) + Ewd_table[rep][i][j][k] = fcorr[rep]; + } + + int *recvcnts = (int *)mymalloc("recvcnts", NTask * sizeof(int)); + int *recvoffs = (int *)mymalloc("recvoffs", NTask * sizeof(int)); + + for(int i = 0; i < NTask; i++) + { + int off, cnt; + subdivide_evenly(size, NTask, i, &off, &cnt); + recvcnts[i] = cnt * sizeof(double); + recvoffs[i] = off * sizeof(double); + } + + for(int rep = 0; rep < 4; rep++) + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(double), MPI_BYTE, Ewd_table[rep], recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + + myfree(recvoffs); + myfree(recvcnts); + + /* now scale things to the boxsize that is actually used */ + Ewd_table_intp = 2 * TEW_N / All.BoxSize; + + double t1 = second(); + mpi_printf("FORCETEST: Initialization of high-res Ewald table finished, took %g sec.\n", timediff(t0, t1)); +} + +/*! \brief Looks up Ewald force from tabulated values. + * + * \param[in] dx X position. + * \param[in] dy Y position. + * \param[in] dz Z position. + * \param[out] force Ewald force correction. + * + * \return void + */ +static void ewald_correction_force_table_lookup(double dx, double dy, double dz, double force[4]) +{ + int signx, signy, signz; + int i, j, k; + double u, v, w; + double f1, f2, f3, f4, f5, f6, f7, f8; + + if(dx < 0) + { + dx = -dx; + signx = -1; + } + else + signx = +1; + + if(dy < 0) + { + dy = -dy; + signy = -1; + } + else + signy = +1; + + if(dz < 0) + { + dz = -dz; + signz = -1; + } + else + signz = +1; + + u = dx * Ewd_table_intp; + i = (int)u; + if(i >= TEW_NX) + i = TEW_NX - 1; + u -= i; + v = dy * Ewd_table_intp; + j = (int)v; + if(j >= TEW_NY) + j = TEW_NY - 1; + v -= j; + w = dz * Ewd_table_intp; + k = (int)w; + if(k >= TEW_NZ) + k = TEW_NZ - 1; + w -= k; + + f1 = (1 - u) * (1 - v) * (1 - w); + f2 = (1 - u) * (1 - v) * (w); + f3 = (1 - u) * (v) * (1 - w); + f4 = (1 - u) * (v) * (w); + f5 = (u) * (1 - v) * (1 - w); + f6 = (u) * (1 - v) * (w); + f7 = (u) * (v) * (1 - w); + f8 = (u) * (v) * (w); + + for(int rep = 0; rep < 4; rep++) + { + force[rep] = Ewd_table[rep][i][j][k] * f1 + Ewd_table[rep][i][j][k + 1] * f2 + Ewd_table[rep][i][j + 1][k] * f3 + + Ewd_table[rep][i][j + 1][k + 1] * f4 + Ewd_table[rep][i + 1][j][k] * f5 + Ewd_table[rep][i + 1][j][k + 1] * f6 + + Ewd_table[rep][i + 1][j + 1][k] * f7 + Ewd_table[rep][i + 1][j + 1][k + 1] * f8; + } + + force[0] *= signx; + force[1] *= signy; + force[2] *= signz; +} + +#endif /* #if !defined(FORCETEST_TESTFORCELAW) */ + +#endif /* #ifdef FORCETEST */ diff --git a/src/amuse/community/arepo/src/gravity/longrange.c b/src/amuse/community/arepo/src/gravity/longrange.c new file mode 100644 index 0000000000..2fbd6a2e53 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/longrange.c @@ -0,0 +1,199 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/longrange.c + * \date 05/2018 + * \brief Driver routines for computation of long-range gravitational + * PM force + * \details contains functions: + * void long_range_init(void) + * void long_range_init_regionsize(void) + * void long_range_force(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef PMGRID +/*! \brief Driver routine to call initialization of periodic or/and + * non-periodic FFT routines. + * + * \return void + */ +void long_range_init(void) +{ +#ifndef GRAVITY_NOT_PERIODIC + pm_init_periodic(); +#ifdef TWODIMS + pm2d_init_periodic(); +#endif /* #ifdef TWODIMS */ +#ifdef PLACEHIGHRESREGION + pm_init_nonperiodic(); +#endif /* #ifdef PLACEHIGHRESREGION */ +#else /* #ifndef GRAVITY_NOT_PERIODIC */ + pm_init_nonperiodic(); +#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */ +} + +/*! \brief Driver routine to determine the extend of the non- + * periodic or high resolution region. + * + * The initialization is done by pm_init_regionsize(). Afterwards + * the convolution kernels are computed by pm_setup_nonperiodic_kernel(). + * + * \return void + */ +void long_range_init_regionsize(void) +{ +#ifndef GRAVITY_NOT_PERIODIC +#ifdef PLACEHIGHRESREGION + if(RestartFlag != 1) + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); +#endif /* #ifdef PLACEHIGHRESREGION */ + +#else /* #ifndef GRAVITY_NOT_PERIODIC */ + if(RestartFlag != 1) + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); +#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */ +} + +/*! \brief This function computes the long-range PM force for all particles. + * + * In case of a periodic grid the force is calculated by pmforce_periodic() + * otherwise by pmforce_nonperiodic(). If a high resolution region is + * specified for the PM force, pmforce_nonperiodic() calculates that force in + * both cases. + * + * \return void + */ +void long_range_force(void) +{ + int i; + + TIMER_START(CPU_PM_GRAVITY); + +#ifdef GRAVITY_NOT_PERIODIC + int j; + double fac; +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + + for(i = 0; i < NumPart; i++) + { + P[i].GravPM[0] = P[i].GravPM[1] = P[i].GravPM[2] = 0; +#ifdef EVALPOTENTIAL + P[i].PM_Potential = 0; +#endif /* #ifdef EVALPOTENTIAL */ + } + +#ifndef SELFGRAVITY + return; +#endif /* #ifndef SELFGRAVITY */ + +#ifndef GRAVITY_NOT_PERIODIC + +#ifdef TWODIMS + pm2d_force_periodic(0); +#else /* #ifdef TWODIMS */ + pmforce_periodic(0, NULL); +#endif /* #ifdef TWODIMS #else */ + +#ifdef PLACEHIGHRESREGION + i = pmforce_nonperiodic(1); + + if(i == 1) /* this is returned if a particle lied outside allowed range */ + { + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); + i = pmforce_nonperiodic(1); /* try again */ + } + if(i == 1) + terminate("despite we tried to increase the region, we still don't fit all particles in it"); +#endif /* #ifdef PLACEHIGHRESREGION */ + +#else /* #ifndef GRAVITY_NOT_PERIODIC */ + i = pmforce_nonperiodic(0); + + if(i == 1) /* this is returned if a particle lied outside allowed range */ + { + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); + i = pmforce_nonperiodic(0); /* try again */ + } + if(i == 1) + terminate("despite we tried to increase the region, somehow we still don't fit all particles in it"); +#ifdef PLACEHIGHRESREGION + i = pmforce_nonperiodic(1); + + if(i == 1) /* this is returned if a particle lied outside allowed range */ + { + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); + + /* try again */ + + for(i = 0; i < NumPart; i++) + P[i].GravPM[0] = P[i].GravPM[1] = P[i].GravPM[2] = 0; + + i = pmforce_nonperiodic(0) + pmforce_nonperiodic(1); + } + if(i != 0) + terminate("despite we tried to increase the region, somehow we still don't fit all particles in it"); +#endif /* #ifdef PLACEHIGHRESREGION */ +#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */ + +#ifdef GRAVITY_NOT_PERIODIC + if(All.ComovingIntegrationOn) + { + fac = 0.5 * All.Hubble * All.Hubble * All.Omega0; + + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + P[i].GravPM[j] += fac * P[i].Pos[j]; + } + + /* Finally, the following factor allows a computation of cosmological simulation + with vacuum energy in physical coordinates */ + if(All.ComovingIntegrationOn == 0) + { + fac = All.OmegaLambda * All.Hubble * All.Hubble; + + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + P[i].GravPM[j] += fac * P[i].Pos[j]; + } +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + + TIMER_STOP(CPU_PM_GRAVITY); + + find_long_range_step_constraint(); +} +#endif /* #ifdef PMGRID */ diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c b/src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c new file mode 100644 index 0000000000..866ef06459 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c @@ -0,0 +1,1771 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/pm/pm_mpi_fft.c + * \date 05/2018 + * \brief Home-made parallel FFT transforms as needed by the code. + * \details We only use the one-dimensional FFTW3 routines, because the + * MPI versions of FFTW3 allocate memory for themselves during the + * transforms (which we want to strictly avoid), and because we + * want to allow transforms that are so big that more than 2GB + * may be transferred betweeen processors. + * + * contains functions: + * void my_slab_based_fft_init(fft_plan * plan, int NgridX, + * int NgridY, int NgridZ) + * void my_slab_transposeA(fft_plan * plan, fft_real * field, + * fft_real * scratch) + * void my_slab_transposeB(fft_plan * plan, fft_real * field, + * fft_real * scratch) + * static void my_slab_transpose(void *av, void *bv, int *sx, + * int *firstx, int *sy, int *firsty, int nx, int ny, int nz, + * int mode) + * void my_slab_based_fft(fft_plan * plan, void *data, + * void *workspace, int forward) + * void my_slab_based_fft_c2c(fft_plan * plan, void *data, + * void *workspace, int forward) + * void my_column_based_fft_init(fft_plan * plan, int NgridX, + * int NgridY, int NgridZ) + * void my_column_based_fft_init_c2c(fft_plan * plan, + * int NgridX, int NgridY, int NgridZ) + * void my_fft_swap23(fft_plan * plan, fft_real * data, + * fft_real * out) + * void my_fft_swap23back(fft_plan * plan, fft_real * data, + * fft_real * out) + * void my_fft_swap13(fft_plan * plan, fft_real * data, + * fft_real * out) + * void my_fft_swap13back(fft_plan * plan, fft_real * data, + * fft_real * out) + * void my_column_based_fft(fft_plan * plan, void *data, + * void *workspace, int forward) + * void my_column_based_fft_c2c(fft_plan * plan, void *data, + * void *workspace, int forward)# + * static void my_fft_column_remap(fft_complex * data, + * int Ndims[3], int in_firstcol, int in_ncol, + * fft_complex * out, int perm[3], int out_firstcol, + * int out_ncol, size_t * offset_send, size_t * offset_recv, + * size_t * count_send, size_t * count_recv, + * size_t just_count_flag) + * static void my_fft_column_transpose(fft_real * data, + * int Ndims[3], int in_firstcol, int in_ncol, fft_real * out, + * int perm[3], int out_firstcol, int out_ncol, + * size_t * offset_send, size_t * offset_recv, + * size_t * count_send, size_t * count_recv, + * size_t just_count_flag) + * static void my_fft_column_transpose_c(fft_complex * data, + * int Ndims[3], int in_firstcol, int in_ncol, + * fft_complex * out, int perm[3], int out_firstcol, + * int out_ncol, size_t * offset_send, size_t * offset_recv, + * size_t * count_send, size_t * count_recv, + * size_t just_count_flag) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 26.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if defined(PMGRID) + +#ifndef FFT_COLUMN_BASED +/*! \brief Initializes slab based FFT. + * + * \param[out] plan FFT plan. + * \param[in] NgridX Number of grid points in X direction. + * \param[in] NgridY Number of grid points in Y direction. + * \param[in] NgridZ Number of grid points in Z direction. + * + * \return void + */ +void my_slab_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ) +{ + subdivide_evenly(NgridX, NTask, ThisTask, &plan->slabstart_x, &plan->nslab_x); + subdivide_evenly(NgridY, NTask, ThisTask, &plan->slabstart_y, &plan->nslab_y); + + plan->slab_to_task = (int *)mymalloc("slab_to_task", NgridX * sizeof(int)); + + for(int task = 0; task < NTask; task++) + { + int start, n; + + subdivide_evenly(NgridX, NTask, task, &start, &n); + + for(int i = start; i < start + n; i++) + plan->slab_to_task[i] = task; + } + + MPI_Allreduce(&plan->nslab_x, &plan->largest_x_slab, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&plan->nslab_y, &plan->largest_y_slab, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + plan->slabs_x_per_task = (int *)mymalloc("slabs_x_per_task", NTask * sizeof(int)); + MPI_Allgather(&plan->nslab_x, 1, MPI_INT, plan->slabs_x_per_task, 1, MPI_INT, MPI_COMM_WORLD); + + plan->first_slab_x_of_task = (int *)mymalloc("first_slab_x_of_task", NTask * sizeof(int)); + MPI_Allgather(&plan->slabstart_x, 1, MPI_INT, plan->first_slab_x_of_task, 1, MPI_INT, MPI_COMM_WORLD); + + plan->slabs_y_per_task = (int *)mymalloc("slabs_y_per_task", NTask * sizeof(int)); + MPI_Allgather(&plan->nslab_y, 1, MPI_INT, plan->slabs_y_per_task, 1, MPI_INT, MPI_COMM_WORLD); + + plan->first_slab_y_of_task = (int *)mymalloc("first_slab_y_of_task", NTask * sizeof(int)); + MPI_Allgather(&plan->slabstart_y, 1, MPI_INT, plan->first_slab_y_of_task, 1, MPI_INT, MPI_COMM_WORLD); + + plan->NgridX = NgridX; + plan->NgridY = NgridY; + plan->NgridZ = NgridZ; + + int Ngridz = NgridZ / 2 + 1; /* dimension needed in complex space */ + + plan->Ngridz = Ngridz; + plan->Ngrid2 = 2 * Ngridz; +} + +/*! \brief Transposes the array field. + * + * The array field is transposed such that the data in x direction is local + * to only one task. This is done, so the force in x-direction can be + * obtained by finite differencing. However the array is not fully + * transposed, i.e. the x-direction is not the fastest running array index. + * + * \param[in] plan FFT pan. + * \param[in, out] field The array to transpose. + * \param[out] scratch Scratch space used during communication (same size as + * field). + * + * \return void + */ +void my_slab_transposeA(fft_plan *plan, fft_real *field, fft_real *scratch) +{ + int n, prod, task, flag_big = 0, flag_big_all = 0; + + prod = NTask * plan->nslab_x; + + for(n = 0; n < prod; n++) + { + int x = n / NTask; + int task = n % NTask; + + int y; + + for(y = plan->first_slab_y_of_task[task]; y < plan->first_slab_y_of_task[task] + plan->slabs_y_per_task[task]; y++) + memcpy(scratch + ((size_t)plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x + + x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])), + field + ((size_t)plan->Ngrid2) * (plan->NgridY * x + y), plan->NgridZ * sizeof(fft_real)); + } + + size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t)); + size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t)); + size_t *soff = (size_t *)mymalloc("soff", NTask * sizeof(size_t)); + size_t *roff = (size_t *)mymalloc("roff", NTask * sizeof(size_t)); + + for(task = 0; task < NTask; task++) + { + scount[task] = plan->nslab_x * plan->slabs_y_per_task[task] * (plan->NgridZ * sizeof(fft_real)); + rcount[task] = plan->nslab_y * plan->slabs_x_per_task[task] * (plan->NgridZ * sizeof(fft_real)); + + soff[task] = plan->first_slab_y_of_task[task] * plan->nslab_x * (plan->NgridZ * sizeof(fft_real)); + roff[task] = plan->first_slab_x_of_task[task] * plan->nslab_y * (plan->NgridZ * sizeof(fft_real)); + + if(scount[task] > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + } + + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + myMPI_Alltoallv(scratch, scount, soff, field, rcount, roff, 1, flag_big_all, MPI_COMM_WORLD); + + myfree(roff); + myfree(soff); + myfree(rcount); + myfree(scount); +} + +/*! \brief Undo the transposition of the array field. + * + * The transposition of the array field is undone such that the data in + * x direction is distributed among all tasks again. Thus the result of + * force computation in x-direction is sent back to the original task. + * + * \param[in] plan FFT plan. + * \param[in, out] field The array to transpose. + * \param[out] scratch Scratch space used during communication (same size as + * field). + * + * \return void + */ +void my_slab_transposeB(fft_plan *plan, fft_real *field, fft_real *scratch) +{ + int n, prod, task, flag_big = 0, flag_big_all = 0; + + size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t)); + size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t)); + size_t *soff = (size_t *)mymalloc("soff", NTask * sizeof(size_t)); + size_t *roff = (size_t *)mymalloc("roff", NTask * sizeof(size_t)); + + for(task = 0; task < NTask; task++) + { + rcount[task] = plan->nslab_x * plan->slabs_y_per_task[task] * (plan->NgridZ * sizeof(fft_real)); + scount[task] = plan->nslab_y * plan->slabs_x_per_task[task] * (plan->NgridZ * sizeof(fft_real)); + + roff[task] = plan->first_slab_y_of_task[task] * plan->nslab_x * (plan->NgridZ * sizeof(fft_real)); + soff[task] = plan->first_slab_x_of_task[task] * plan->nslab_y * (plan->NgridZ * sizeof(fft_real)); + + if(scount[task] > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + } + + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + myMPI_Alltoallv(field, scount, soff, scratch, rcount, roff, 1, flag_big_all, MPI_COMM_WORLD); + + myfree(roff); + myfree(soff); + myfree(rcount); + myfree(scount); + + prod = NTask * plan->nslab_x; + + for(n = 0; n < prod; n++) + { + int x = n / NTask; + int task = n % NTask; + + int y; + for(y = plan->first_slab_y_of_task[task]; y < plan->first_slab_y_of_task[task] + plan->slabs_y_per_task[task]; y++) + memcpy(field + ((size_t)plan->Ngrid2) * (plan->NgridY * x + y), + scratch + ((size_t)plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x + + x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])), + plan->NgridZ * sizeof(fft_real)); + } +} + +/* \brief Transpose a slab decomposed 3D field. + * + * Given a slab-decomposed 3D field a[...] with total dimension + * [nx x ny x nz], whose first dimension is split across the processors, this + * routine outputs in b[] the transpose where then the second dimension is + * split across the processors. sx[] gives for each MPI task how many slabs + * it has, and firstx[] is the first slab for a given task. Likewise, + * sy[]/firsty[] gives the same thing for the transposed order. Note, the + * contents of the array a[] will be destroyed by the routine. + * + * An element (x,y,z) is accessed in a[] with index + * [([x - firstx] * ny + y) * nz + z] and in b[] as + * [((y - firsty) * nx + x) * nz + z] + * + * \param[in, out] av Pointer to array a. + * \param[in, out] bv Pointer to array b. + * \param[in] sx Array storing number of slabs in each task. + * \param[in] fristx Array with first slab in each task. + * \param[in] sy Array storing number of transposed slabs in each task. + * \param[in] firsty Array storing first transposed slab in each task. + * \param[in] nx Number of elements in x direction. + * \param[in] ny Number of elements in y direction. + * \param[in] nz Number of elements in z direction. + * \param[in] mode If mode = 1, the reverse operation is carried out. + * + * \return void + */ +static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy, int *firsty, int nx, int ny, int nz, int mode) +{ + char *a = (char *)av; + char *b = (char *)bv; + + size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t)); + size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t)); + size_t *soff = (size_t *)mymalloc("soff", NTask * sizeof(size_t)); + size_t *roff = (size_t *)mymalloc("roff", NTask * sizeof(size_t)); + int i, n, prod, flag_big = 0, flag_big_all = 0; + + for(i = 0; i < NTask; i++) + { + scount[i] = sy[i] * sx[ThisTask] * ((size_t)nz); + rcount[i] = sy[ThisTask] * sx[i] * ((size_t)nz); + soff[i] = firsty[i] * sx[ThisTask] * ((size_t)nz); + roff[i] = sy[ThisTask] * firstx[i] * ((size_t)nz); + + if(scount[i] * sizeof(fft_complex) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + } + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + if(mode == 0) + { + /* first pack the data into contiguous blocks */ + prod = NTask * sx[ThisTask]; + for(n = 0; n < prod; n++) + { + int k = n / NTask; + int i = n % NTask; + int j; + + for(j = 0; j < sy[i]; j++) + memcpy(b + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)), + a + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex)); + } + + /* tranfer the data */ + myMPI_Alltoallv(b, scount, soff, a, rcount, roff, sizeof(fft_complex), flag_big_all, MPI_COMM_WORLD); + + /* unpack the data into the right order */ + prod = NTask * sy[ThisTask]; + for(n = 0; n < prod; n++) + { + int j = n / NTask; + int i = n % NTask; + int k; + + for(k = 0; k < sx[i]; k++) + memcpy(b + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)), + a + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex)); + } + } + else + { + /* first pack the data into contiguous blocks */ + prod = NTask * sy[ThisTask]; + for(n = 0; n < prod; n++) + { + int j = n / NTask; + int i = n % NTask; + int k; + + for(k = 0; k < sx[i]; k++) + memcpy(b + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)), + a + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex)); + } + + /* tranfer the data */ + myMPI_Alltoallv(b, rcount, roff, a, scount, soff, sizeof(fft_complex), flag_big_all, MPI_COMM_WORLD); + + /* unpack the data into the right order */ + prod = NTask * sx[ThisTask]; + for(n = 0; n < prod; n++) + { + int k = n / NTask; + int i = n % NTask; + int j; + + for(j = 0; j < sy[i]; j++) + memcpy(b + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)), + a + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex)); + } + } + /* now the result is in b[] */ + + myfree(roff); + myfree(soff); + myfree(rcount); + myfree(scount); +} + +/*! \brief Performs a slab-based Fast Fourier transformation. + * + * \param[in] plan FFT plan. + * \param[in, out] data Array to be Fourier transformed. + * \param[out] workspace Workspace to temporary operate in. + * \param[in] forward Forward (1) or backward (-1) Fourier transformaiton? + * + * \return void + */ +void my_slab_based_fft(fft_plan *plan, void *data, void *workspace, int forward) +{ + int n, prod; + int slabsx = plan->slabs_x_per_task[ThisTask]; + int slabsy = plan->slabs_y_per_task[ThisTask]; + + int ngridx = plan->NgridX; + int ngridy = plan->NgridY; + int ngridz = plan->Ngridz; + int ngridz2 = 2 * ngridz; + + size_t ngridx_long = ngridx; + size_t ngridy_long = ngridy; + size_t ngridz_long = ngridz; + size_t ngridz2_long = ngridz2; + + fft_real *data_real = (fft_real *)data; + fft_complex *data_complex = (fft_complex *)data, *workspace_complex = (fft_complex *)workspace; + + if(forward == 1) + { + /* do the z-direction FFT, real to complex */ + prod = slabsx * ngridy; + for(n = 0; n < prod; n++) + { + FFTW(execute_dft_r2c)(plan->forward_plan_zdir, data_real + n * ngridz2_long, workspace_complex + n * ngridz_long); + } + + /* do the y-direction FFT, complex to complex */ + prod = slabsx * ngridz; + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j); + } + + /* now our data resides in data_complex[] */ + + /* do the transpose */ + my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, + plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0); + + /* now the data is in workspace_complex[] */ + + /* finally, do the transform along the x-direction (we are in transposed order, x and y have interchanged */ + prod = slabsy * ngridz; + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j); + } + + /* now the result is in data_complex[] */ + } + else + { + prod = slabsy * ngridz; + + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j); + } + + my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, + plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1); + + prod = slabsx * ngridz; + + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j); + } + + prod = slabsx * ngridy; + + for(n = 0; n < prod; n++) + { + FFTW(execute_dft_c2r)(plan->backward_plan_zdir, workspace_complex + n * ngridz_long, data_real + n * ngridz2_long); + } + + /* now the result is in data[] */ + } +} + +/*! \brief Performs a slab-based complex to complex Fast Fourier + * transformation. + * + * \param[in] plan FFT plan. + * \param[in, out] data Array to be Fourier transformed. + * \param[out] workspace Workspace to temporary operate in. + * \param[in] forward Forward (1) or backward (-1) Fourier transformaiton? + * + * \return void + */ +void my_slab_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward) +{ + int n, prod; + int slabsx = plan->slabs_x_per_task[ThisTask]; + int slabsy = plan->slabs_y_per_task[ThisTask]; + + int ngridx = plan->NgridX; + int ngridy = plan->NgridY; + int ngridz = plan->NgridZ; + + size_t ngridx_long = ngridx; + size_t ngridy_long = ngridy; + size_t ngridz_long = ngridz; + + fft_complex *data_start = (fft_complex *)data; + fft_complex *data_complex = (fft_complex *)data, *workspace_complex = (fft_complex *)workspace; + + if(forward == 1) + { + /* do the z-direction FFT, complex to complex */ + prod = slabsx * ngridy; + for(n = 0; n < prod; n++) + { + FFTW(execute_dft)(plan->forward_plan_zdir, data_start + n * ngridz, workspace_complex + n * ngridz); + } + + /* do the y-direction FFT, complex to complex */ + prod = slabsx * ngridz; + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j); + } + + /* now our data resides in data_complex[] */ + + /* do the transpose */ + my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, + plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0); + + /* now the data is in workspace_complex[] */ + + /* finally, do the transform along the x-direction (we are in transposed order, x and y have interchanged */ + prod = slabsy * ngridz; + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j); + } + + /* now the result is in data_complex[] */ + } + else + { + prod = slabsy * ngridz; + + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j); + } + + my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, + plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1); + + prod = slabsx * ngridz; + + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j); + } + + prod = slabsx * ngridy; + + for(n = 0; n < prod; n++) + { + FFTW(execute_dft)(plan->backward_plan_zdir, workspace_complex + n * ngridz, data_start + n * ngridz); + } + + /* now the result is in data[] */ + } +} + +#else /* #ifndef FFT_COLUMN_BASED */ + +static void my_fft_column_remap(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3], + int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send, + size_t *count_recv, size_t just_count_flag); + +static void my_fft_column_transpose(fft_real *data, int Ndims[3], /* global dimensions of data cube */ + int in_firstcol, int in_ncol, /* first column and number of columns */ + fft_real *out, int perm[3], int out_firstcol, int out_ncol, size_t *offset_send, + size_t *offset_recv, size_t *count_send, size_t *count_recv, size_t just_count_flag); + +static void my_fft_column_transpose_c(fft_complex *data, int Ndims[3], /* global dimensions of data cube */ + int in_firstcol, int in_ncol, /* first column and number of columns */ + fft_complex *out, int perm[3], int out_firstcol, int out_ncol, size_t *offset_send, + size_t *offset_recv, size_t *count_send, size_t *count_recv, size_t just_count_flag); + +/*! \brief Initializes column based FFT. + * + * \param[out] plan FFT plan. + * \param[in] NgridX Number of grid points in X direction. + * \param[in] NgridY Number of grid points in Y direction. + * \param[in] NgridZ Number of grid points in Z direction. + * + * \return void + */ +void my_column_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ) +{ + plan->NgridX = NgridX; + plan->NgridY = NgridY; + plan->NgridZ = NgridZ; + + int Ngridz = NgridZ / 2 + 1; + + plan->Ngridz = Ngridz; + plan->Ngrid2 = 2 * Ngridz; + + int columns, avg, exc, tasklastsection, pivotcol; + + columns = NgridX * NgridY; + avg = (columns - 1) / NTask + 1; + exc = NTask * avg - columns; + tasklastsection = NTask - exc; + pivotcol = tasklastsection * avg; + + plan->pivotcol = pivotcol; + plan->avg = avg; + plan->tasklastsection = tasklastsection; + + if(ThisTask < tasklastsection) + { + plan->base_firstcol = ThisTask * avg; + plan->base_ncol = avg; + } + else + { + plan->base_firstcol = ThisTask * avg - (ThisTask - tasklastsection); + plan->base_ncol = avg - 1; + } + + plan->base_lastcol = plan->base_firstcol + plan->base_ncol - 1; + + subdivide_evenly(NgridX * Ngridz, NTask, ThisTask, &plan->transposed_firstcol, &plan->transposed_ncol); + + subdivide_evenly(NgridY * Ngridz, NTask, ThisTask, &plan->second_transposed_firstcol, &plan->second_transposed_ncol); + + subdivide_evenly(plan->NgridX * plan->Ngrid2, NTask, ThisTask, &plan->firstcol_XZ, &plan->ncol_XZ); + + subdivide_evenly(plan->NgridY * plan->Ngrid2, NTask, ThisTask, &plan->firstcol_YZ, &plan->ncol_YZ); + + plan->second_transposed_ncells = ((size_t)plan->NgridX) * plan->second_transposed_ncol; + + plan->max_datasize = ((size_t)plan->Ngrid2) * plan->base_ncol; + plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridY) * plan->transposed_ncol); + plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridX) * plan->second_transposed_ncol); + plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_XZ) * plan->NgridY); + plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_YZ) * plan->NgridX); + + plan->fftsize = plan->max_datasize; + + plan->offsets_send_A = mymalloc_clear("offsets_send_A", NTask * sizeof(size_t)); + plan->offsets_recv_A = mymalloc_clear("offsets_recv_A", NTask * sizeof(size_t)); + plan->offsets_send_B = mymalloc_clear("offsets_send_B", NTask * sizeof(size_t)); + plan->offsets_recv_B = mymalloc_clear("offsets_recv_B", NTask * sizeof(size_t)); + plan->offsets_send_C = mymalloc_clear("offsets_send_C", NTask * sizeof(size_t)); + plan->offsets_recv_C = mymalloc_clear("offsets_recv_C", NTask * sizeof(size_t)); + plan->offsets_send_D = mymalloc_clear("offsets_send_D", NTask * sizeof(size_t)); + plan->offsets_recv_D = mymalloc_clear("offsets_recv_D", NTask * sizeof(size_t)); + plan->offsets_send_13 = mymalloc_clear("offsets_send_13", NTask * sizeof(size_t)); + plan->offsets_recv_13 = mymalloc_clear("offsets_recv_13", NTask * sizeof(size_t)); + plan->offsets_send_23 = mymalloc_clear("offsets_send_23", NTask * sizeof(size_t)); + plan->offsets_recv_23 = mymalloc_clear("offsets_recv_23", NTask * sizeof(size_t)); + plan->offsets_send_13back = mymalloc_clear("offsets_send_13back", NTask * sizeof(size_t)); + plan->offsets_recv_13back = mymalloc_clear("offsets_recv_13back", NTask * sizeof(size_t)); + plan->offsets_send_23back = mymalloc_clear("offsets_send_23back", NTask * sizeof(size_t)); + plan->offsets_recv_23back = mymalloc_clear("offsets_recv_23back", NTask * sizeof(size_t)); + + plan->count_send_A = mymalloc_clear("count_send_A", NTask * sizeof(size_t)); + plan->count_recv_A = mymalloc_clear("count_recv_A", NTask * sizeof(size_t)); + plan->count_send_B = mymalloc_clear("count_send_B", NTask * sizeof(size_t)); + plan->count_recv_B = mymalloc_clear("count_recv_B", NTask * sizeof(size_t)); + plan->count_send_C = mymalloc_clear("count_send_C", NTask * sizeof(size_t)); + plan->count_recv_C = mymalloc_clear("count_recv_C", NTask * sizeof(size_t)); + plan->count_send_D = mymalloc_clear("count_send_D", NTask * sizeof(size_t)); + plan->count_recv_D = mymalloc_clear("count_recv_D", NTask * sizeof(size_t)); + plan->count_send_13 = mymalloc_clear("count_send_13", NTask * sizeof(size_t)); + plan->count_recv_13 = mymalloc_clear("count_recv_13", NTask * sizeof(size_t)); + plan->count_send_23 = mymalloc_clear("count_send_23", NTask * sizeof(size_t)); + plan->count_recv_23 = mymalloc_clear("count_recv_23", NTask * sizeof(size_t)); + plan->count_send_13back = mymalloc_clear("count_send_13back", NTask * sizeof(size_t)); + plan->count_recv_13back = mymalloc_clear("count_recv_13back", NTask * sizeof(size_t)); + plan->count_send_23back = mymalloc_clear("count_send_23back", NTask * sizeof(size_t)); + plan->count_recv_23back = mymalloc_clear("count_recv_23back", NTask * sizeof(size_t)); + + int dimA[3] = {plan->NgridX, plan->NgridY, plan->Ngridz}; + int permA[3] = {0, 2, 1}; + + my_fft_column_remap(NULL, dimA, plan->base_firstcol, plan->base_ncol, NULL, permA, plan->transposed_firstcol, plan->transposed_ncol, + plan->offsets_send_A, plan->offsets_recv_A, plan->count_send_A, plan->count_recv_A, 1); + + int dimB[3] = {plan->NgridX, plan->Ngridz, plan->NgridY}; + int permB[3] = {2, 1, 0}; + + my_fft_column_remap(NULL, dimB, plan->transposed_firstcol, plan->transposed_ncol, NULL, permB, plan->second_transposed_firstcol, + plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, plan->count_send_B, plan->count_recv_B, + 1); + + int dimC[3] = {plan->NgridY, plan->Ngridz, plan->NgridX}; + int permC[3] = {2, 1, 0}; + + my_fft_column_remap(NULL, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, NULL, permC, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, plan->count_send_C, + plan->count_recv_C, 1); + + int dimD[3] = {plan->NgridX, plan->Ngridz, plan->NgridY}; + int permD[3] = {0, 2, 1}; + + my_fft_column_remap(NULL, dimD, plan->transposed_firstcol, plan->transposed_ncol, NULL, permD, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, plan->count_recv_D, 1); + + int dim23[3] = {plan->NgridX, plan->NgridY, plan->Ngrid2}; + int perm23[3] = {0, 2, 1}; + + my_fft_column_transpose(NULL, dim23, plan->base_firstcol, plan->base_ncol, NULL, perm23, plan->firstcol_XZ, plan->ncol_XZ, + plan->offsets_send_23, plan->offsets_recv_23, plan->count_send_23, plan->count_recv_23, 1); + + int dim23back[3] = {plan->NgridX, plan->Ngrid2, plan->NgridY}; + int perm23back[3] = {0, 2, 1}; + + my_fft_column_transpose(NULL, dim23back, plan->firstcol_XZ, plan->ncol_XZ, NULL, perm23back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_23back, plan->offsets_recv_23back, plan->count_send_23back, plan->count_recv_23back, 1); + + int dim13[3] = {plan->NgridX, plan->NgridY, plan->Ngrid2}; + int perm13[3] = {2, 1, 0}; + + my_fft_column_transpose(NULL, dim13, plan->base_firstcol, plan->base_ncol, NULL, perm13, plan->firstcol_YZ, plan->ncol_YZ, + plan->offsets_send_13, plan->offsets_recv_13, plan->count_send_13, plan->count_recv_13, 1); + + int dim13back[3] = {plan->Ngrid2, plan->NgridY, plan->NgridX}; + int perm13back[3] = {2, 1, 0}; + + my_fft_column_transpose(NULL, dim13back, plan->firstcol_YZ, plan->ncol_YZ, NULL, perm13back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_13back, plan->offsets_recv_13back, plan->count_send_13back, plan->count_recv_13back, 1); +} + +/*! \brief Initializes complex to complex column based FFT. + * + * \param[out] plan FFT plan. + * \param[in] NgridX Number of grid points in X direction. + * \param[in] NgridY Number of grid points in Y direction. + * \param[in] NgridZ Number of grid points in Z direction. + * + * \return void + */ +void my_column_based_fft_init_c2c(fft_plan *plan, int NgridX, int NgridY, int NgridZ) +{ + plan->NgridX = NgridX; + plan->NgridY = NgridY; + plan->NgridZ = NgridZ; + + int columns, avg, exc, tasklastsection, pivotcol; + + columns = NgridX * NgridY; + avg = (columns - 1) / NTask + 1; + exc = NTask * avg - columns; + tasklastsection = NTask - exc; + pivotcol = tasklastsection * avg; + + plan->pivotcol = pivotcol; + plan->avg = avg; + plan->tasklastsection = tasklastsection; + + if(ThisTask < tasklastsection) + { + plan->base_firstcol = ThisTask * avg; + plan->base_ncol = avg; + } + else + { + plan->base_firstcol = ThisTask * avg - (ThisTask - tasklastsection); + plan->base_ncol = avg - 1; + } + + plan->base_lastcol = plan->base_firstcol + plan->base_ncol - 1; + + subdivide_evenly(NgridX * NgridZ, NTask, ThisTask, &plan->transposed_firstcol, &plan->transposed_ncol); + + subdivide_evenly(NgridY * NgridZ, NTask, ThisTask, &plan->second_transposed_firstcol, &plan->second_transposed_ncol); + + subdivide_evenly(plan->NgridX * plan->NgridZ, NTask, ThisTask, &plan->firstcol_XZ, &plan->ncol_XZ); + + subdivide_evenly(plan->NgridY * plan->NgridZ, NTask, ThisTask, &plan->firstcol_YZ, &plan->ncol_YZ); + + plan->second_transposed_ncells = ((size_t)plan->NgridX) * plan->second_transposed_ncol; + + plan->max_datasize = 2 * ((size_t)plan->NgridZ) * plan->base_ncol; + plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridY) * plan->transposed_ncol); + plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridX) * plan->second_transposed_ncol); + plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_XZ) * plan->NgridY); + plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_YZ) * plan->NgridX); + + plan->fftsize = plan->max_datasize; + + plan->offsets_send_A = mymalloc_clear("offsets_send_A", NTask * sizeof(size_t)); + plan->offsets_recv_A = mymalloc_clear("offsets_recv_A", NTask * sizeof(size_t)); + plan->offsets_send_B = mymalloc_clear("offsets_send_B", NTask * sizeof(size_t)); + plan->offsets_recv_B = mymalloc_clear("offsets_recv_B", NTask * sizeof(size_t)); + plan->offsets_send_C = mymalloc_clear("offsets_send_C", NTask * sizeof(size_t)); + plan->offsets_recv_C = mymalloc_clear("offsets_recv_C", NTask * sizeof(size_t)); + plan->offsets_send_D = mymalloc_clear("offsets_send_D", NTask * sizeof(size_t)); + plan->offsets_recv_D = mymalloc_clear("offsets_recv_D", NTask * sizeof(size_t)); + plan->offsets_send_13 = mymalloc_clear("offsets_send_13", NTask * sizeof(size_t)); + plan->offsets_recv_13 = mymalloc_clear("offsets_recv_13", NTask * sizeof(size_t)); + plan->offsets_send_23 = mymalloc_clear("offsets_send_23", NTask * sizeof(size_t)); + plan->offsets_recv_23 = mymalloc_clear("offsets_recv_23", NTask * sizeof(size_t)); + plan->offsets_send_13back = mymalloc_clear("offsets_send_13back", NTask * sizeof(size_t)); + plan->offsets_recv_13back = mymalloc_clear("offsets_recv_13back", NTask * sizeof(size_t)); + plan->offsets_send_23back = mymalloc_clear("offsets_send_23back", NTask * sizeof(size_t)); + plan->offsets_recv_23back = mymalloc_clear("offsets_recv_23back", NTask * sizeof(size_t)); + + plan->count_send_A = mymalloc_clear("count_send_A", NTask * sizeof(size_t)); + plan->count_recv_A = mymalloc_clear("count_recv_A", NTask * sizeof(size_t)); + plan->count_send_B = mymalloc_clear("count_send_B", NTask * sizeof(size_t)); + plan->count_recv_B = mymalloc_clear("count_recv_B", NTask * sizeof(size_t)); + plan->count_send_C = mymalloc_clear("count_send_C", NTask * sizeof(size_t)); + plan->count_recv_C = mymalloc_clear("count_recv_C", NTask * sizeof(size_t)); + plan->count_send_D = mymalloc_clear("count_send_D", NTask * sizeof(size_t)); + plan->count_recv_D = mymalloc_clear("count_recv_D", NTask * sizeof(size_t)); + plan->count_send_13 = mymalloc_clear("count_send_13", NTask * sizeof(size_t)); + plan->count_recv_13 = mymalloc_clear("count_recv_13", NTask * sizeof(size_t)); + plan->count_send_23 = mymalloc_clear("count_send_23", NTask * sizeof(size_t)); + plan->count_recv_23 = mymalloc_clear("count_recv_23", NTask * sizeof(size_t)); + plan->count_send_13back = mymalloc_clear("count_send_13back", NTask * sizeof(size_t)); + plan->count_recv_13back = mymalloc_clear("count_recv_13back", NTask * sizeof(size_t)); + plan->count_send_23back = mymalloc_clear("count_send_23back", NTask * sizeof(size_t)); + plan->count_recv_23back = mymalloc_clear("count_recv_23back", NTask * sizeof(size_t)); + + int dimA[3] = {plan->NgridX, plan->NgridY, plan->NgridZ}; + int permA[3] = {0, 2, 1}; + + my_fft_column_remap(NULL, dimA, plan->base_firstcol, plan->base_ncol, NULL, permA, plan->transposed_firstcol, plan->transposed_ncol, + plan->offsets_send_A, plan->offsets_recv_A, plan->count_send_A, plan->count_recv_A, 1); + + int dimB[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int permB[3] = {2, 1, 0}; + + my_fft_column_remap(NULL, dimB, plan->transposed_firstcol, plan->transposed_ncol, NULL, permB, plan->second_transposed_firstcol, + plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, plan->count_send_B, plan->count_recv_B, + 1); + + int dimC[3] = {plan->NgridY, plan->NgridZ, plan->NgridX}; + int permC[3] = {2, 1, 0}; + + my_fft_column_remap(NULL, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, NULL, permC, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, plan->count_send_C, + plan->count_recv_C, 1); + + int dimD[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int permD[3] = {0, 2, 1}; + + my_fft_column_remap(NULL, dimD, plan->transposed_firstcol, plan->transposed_ncol, NULL, permD, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, plan->count_recv_D, 1); + + int dim23[3] = {plan->NgridX, plan->NgridY, plan->NgridZ}; + int perm23[3] = {0, 2, 1}; + + my_fft_column_transpose_c(NULL, dim23, plan->base_firstcol, plan->base_ncol, NULL, perm23, plan->firstcol_XZ, plan->ncol_XZ, + plan->offsets_send_23, plan->offsets_recv_23, plan->count_send_23, plan->count_recv_23, 1); + + int dim23back[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int perm23back[3] = {0, 2, 1}; + + my_fft_column_transpose_c(NULL, dim23back, plan->firstcol_XZ, plan->ncol_XZ, NULL, perm23back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_23back, plan->offsets_recv_23back, plan->count_send_23back, plan->count_recv_23back, 1); + + int dim13[3] = {plan->NgridX, plan->NgridY, plan->NgridZ}; + int perm13[3] = {2, 1, 0}; + + my_fft_column_transpose_c(NULL, dim13, plan->base_firstcol, plan->base_ncol, NULL, perm13, plan->firstcol_YZ, plan->ncol_YZ, + plan->offsets_send_13, plan->offsets_recv_13, plan->count_send_13, plan->count_recv_13, 1); + + int dim13back[3] = {plan->NgridZ, plan->NgridY, plan->NgridX}; + int perm13back[3] = {2, 1, 0}; + + my_fft_column_transpose_c(NULL, dim13back, plan->firstcol_YZ, plan->ncol_YZ, NULL, perm13back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_13back, plan->offsets_recv_13back, plan->count_send_13back, plan->count_recv_13back, 1); +} + +/*! \brief YZ column transpose. + * + * \param[in] plan FFT plan. + * \param[in] data Array with data to be swapped. + * \param[out] out Array with data output. + * + * \return void + */ +void my_fft_swap23(fft_plan *plan, fft_real *data, fft_real *out) +{ + int dim23[3] = {plan->NgridX, plan->NgridY, plan->Ngrid2}; + int perm23[3] = {0, 2, 1}; + + my_fft_column_transpose(data, dim23, plan->base_firstcol, plan->base_ncol, out, perm23, plan->firstcol_XZ, plan->ncol_XZ, + plan->offsets_send_23, plan->offsets_recv_23, plan->count_send_23, plan->count_recv_23, 0); +} + +/*! \brief Reverse YZ column transpose. + * + * \param[in] plan FFT plan. + * \param[in] data Array with data to be swapped. + * \param[out] out Array with data output. + * + * \return void + */ +void my_fft_swap23back(fft_plan *plan, fft_real *data, fft_real *out) +{ + int dim23back[3] = {plan->NgridX, plan->Ngrid2, plan->NgridY}; + int perm23back[3] = {0, 2, 1}; + + my_fft_column_transpose(data, dim23back, plan->firstcol_XZ, plan->ncol_XZ, out, perm23back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_23back, plan->offsets_recv_23back, plan->count_send_23back, plan->count_recv_23back, 0); +} + +/*! \brief XZ column transpose. + * + * \param[in] plan FFT plan. + * \param[in] data Array with data to be swapped. + * \param[out] out Array with data output. + * + * \return void + */ +void my_fft_swap13(fft_plan *plan, fft_real *data, fft_real *out) +{ + int dim13[3] = {plan->NgridX, plan->NgridY, plan->Ngrid2}; + int perm13[3] = {2, 1, 0}; + + my_fft_column_transpose(data, dim13, plan->base_firstcol, plan->base_ncol, out, perm13, plan->firstcol_YZ, plan->ncol_YZ, + plan->offsets_send_13, plan->offsets_recv_13, plan->count_send_13, plan->count_recv_13, 0); +} + +/*! \brief Reverse XZ column transpose. + * + * \param[in] plan FFT plan. + * \param[in] data Array with data to be swapped. + * \param[out] out Array with data output. + * + * \return void + */ +void my_fft_swap13back(fft_plan *plan, fft_real *data, fft_real *out) +{ + int dim13back[3] = {plan->Ngrid2, plan->NgridY, plan->NgridX}; + int perm13back[3] = {2, 1, 0}; + + my_fft_column_transpose(data, dim13back, plan->firstcol_YZ, plan->ncol_YZ, out, perm13back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_13back, plan->offsets_recv_13back, plan->count_send_13back, plan->count_recv_13back, 0); +} + +/*! \brief Performs a column-based Fast Fourier transformation. + * + * \param[in] plan FFT plan. + * \param[in, out] data Array to be Fourier transformed. + * \param[out] workspace Workspace to temporary operate in. + * \param[in] forward Forward (1) or backward (-1) Fourier transformaiton? + * + * \return void + */ +void my_column_based_fft(fft_plan *plan, void *data, void *workspace, int forward) +{ + size_t n; + fft_real *data_real = data, *workspace_real = workspace; + fft_complex *data_complex = data, *workspace_complex = workspace; + + if(forward == 1) + { + /* do the z-direction FFT, real to complex */ + for(n = 0; n < plan->base_ncol; n++) + FFTW(execute_dft_r2c)(plan->forward_plan_zdir, data_real + n * plan->Ngrid2, workspace_complex + n * plan->Ngridz); + + int dimA[3] = {plan->NgridX, plan->NgridY, plan->Ngridz}; + int permA[3] = {0, 2, 1}; + + my_fft_column_remap(workspace_complex, dimA, plan->base_firstcol, plan->base_ncol, data_complex, permA, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_A, plan->offsets_recv_A, + plan->count_send_A, plan->count_recv_A, 0); + + /* do the y-direction FFT in 'data', complex to complex */ + for(n = 0; n < plan->transposed_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY); + + int dimB[3] = {plan->NgridX, plan->Ngridz, plan->NgridY}; + int permB[3] = {2, 1, 0}; + + my_fft_column_remap(workspace_complex, dimB, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permB, + plan->second_transposed_firstcol, plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, + plan->count_send_B, plan->count_recv_B, 0); + + /* do the x-direction FFT in 'data', complex to complex */ + for(n = 0; n < plan->second_transposed_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX); + + /* result is now in workspace */ + } + else + { + /* do inverse FFT in 'data' */ + for(n = 0; n < plan->second_transposed_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX); + + int dimC[3] = {plan->NgridY, plan->Ngridz, plan->NgridX}; + int permC[3] = {2, 1, 0}; + + my_fft_column_remap(workspace_complex, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, data_complex, permC, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, + plan->count_send_C, plan->count_recv_C, 0); + + /* do inverse FFT in 'data' */ + for(n = 0; n < plan->transposed_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY); + + int dimD[3] = {plan->NgridX, plan->Ngridz, plan->NgridY}; + int permD[3] = {0, 2, 1}; + + my_fft_column_remap(workspace_complex, dimD, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permD, + plan->base_firstcol, plan->base_ncol, plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, + plan->count_recv_D, 0); + + /* do complex-to-real inverse transform on z-coordinates */ + for(n = 0; n < plan->base_ncol; n++) + FFTW(execute_dft_c2r)(plan->backward_plan_zdir, data_complex + n * plan->Ngridz, workspace_real + n * plan->Ngrid2); + } +} + +/*! \brief Performs a slab-based complex to complex Fast Fourier + * transformation. + * + * \param[in] plan FFT plan. + * \param[in, out] data Array to be Fourier transformed. + * \param[out] workspace Workspace to temporary operate in. + * \param[in] forward Forward (1) or backward (-1) Fourier transformaiton? + * + * \return void + */ +void my_column_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward) +{ + size_t n; + fft_complex *data_complex = data, *workspace_complex = workspace; + + if(forward == 1) + { + /* do the z-direction FFT, complex to complex */ + for(n = 0; n < plan->base_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_zdir, data_complex + n * plan->NgridZ, workspace_complex + n * plan->NgridZ); + + int dimA[3] = {plan->NgridX, plan->NgridY, plan->NgridZ}; + int permA[3] = {0, 2, 1}; + + my_fft_column_remap(workspace_complex, dimA, plan->base_firstcol, plan->base_ncol, data_complex, permA, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_A, plan->offsets_recv_A, + plan->count_send_A, plan->count_recv_A, 0); + + /* do the y-direction FFT in 'data', complex to complex */ + for(n = 0; n < plan->transposed_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY); + + int dimB[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int permB[3] = {2, 1, 0}; + + my_fft_column_remap(workspace_complex, dimB, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permB, + plan->second_transposed_firstcol, plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, + plan->count_send_B, plan->count_recv_B, 0); + + /* do the x-direction FFT in 'data', complex to complex */ + for(n = 0; n < plan->second_transposed_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX); + + /* result is now in workspace */ + } + else + { + /* do inverse FFT in 'data' */ + for(n = 0; n < plan->second_transposed_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX); + + int dimC[3] = {plan->NgridY, plan->NgridZ, plan->NgridX}; + int permC[3] = {2, 1, 0}; + + my_fft_column_remap(workspace_complex, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, data_complex, permC, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, + plan->count_send_C, plan->count_recv_C, 0); + + /* do inverse FFT in 'data' */ + for(n = 0; n < plan->transposed_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY); + + int dimD[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int permD[3] = {0, 2, 1}; + + my_fft_column_remap(workspace_complex, dimD, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permD, + plan->base_firstcol, plan->base_ncol, plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, + plan->count_recv_D, 0); + + /* do complex-to-complex inverse transform on z-coordinates */ + for(n = 0; n < plan->base_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_zdir, data_complex + n * plan->NgridZ, workspace_complex + n * plan->NgridZ); + } +} + +/*! \brief Remaps column-based FFT data. + * + * \param[in] data Data to be transposed. + * \param[in] Ndims Global number of dimensions of data cube. + * \param[in] in_firstcol First column. + * \param[in] in_ncol Number of columns. + * \param[out] out Data output. + * \param[in] perm Permutations in dimensions. + * \param[out] out_firstcol First column in output data. + * \param[out] out_ncol Number of columns in output data. + * \param[out] offset_send Offset in array for send operation to MPI tasks. + * \param[out] offset_recv Offset in array for receive operation from MPI + * tasks. + * \param[out] count_send Count how many elements have to be sent to each + * MPI task. + * \param[out] count_recv Count how many elements have to be received from + * each MPI task. + * \param[in] just_count_flag Do element counting for communication instead + * of data transfer. + * + * \return void + */ +static void my_fft_column_remap(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3], + int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send, + size_t *count_recv, size_t just_count_flag) +{ + int j, target, origin, ngrp, recvTask, perm_rev[3], xyz[3], uvw[3]; + size_t nimport, nexport; + + /* determine the inverse permutation */ + for(j = 0; j < 3; j++) + perm_rev[j] = perm[j]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) /* not yet the inverse */ + { + for(j = 0; j < 3; j++) + perm_rev[j] = perm[perm[j]]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) + terminate("bummer"); + } + + int in_colums = Ndims[0] * Ndims[1]; + int in_avg = (in_colums - 1) / NTask + 1; + int in_exc = NTask * in_avg - in_colums; + int in_tasklastsection = NTask - in_exc; + int in_pivotcol = in_tasklastsection * in_avg; + + int out_colums = Ndims[perm[0]] * Ndims[perm[1]]; + int out_avg = (out_colums - 1) / NTask + 1; + int out_exc = NTask * out_avg - out_colums; + int out_tasklastsection = NTask - out_exc; + int out_pivotcol = out_tasklastsection * out_avg; + + size_t i, ncells = ((size_t)in_ncol) * Ndims[2]; + + xyz[0] = in_firstcol / Ndims[1]; + xyz[1] = in_firstcol % Ndims[1]; + xyz[2] = 0; + + memset(count_send, 0, NTask * sizeof(size_t)); + + /* loop over all cells in input array and determine target processor */ + for(i = 0; i < ncells; i++) + { + /* determine target task */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int newcol = Ndims[perm[1]] * uvw[0] + uvw[1]; + if(newcol < out_pivotcol) + target = newcol / out_avg; + else + target = (newcol - out_pivotcol) / (out_avg - 1) + out_tasklastsection; + + /* move data element to targettask */ + + if(just_count_flag) + count_send[target]++; + else + { + size_t off = offset_send[target] + count_send[target]++; + out[off][0] = data[i][0]; + out[off][1] = data[i][1]; + } + xyz[2]++; + if(xyz[2] == Ndims[2]) + { + xyz[2] = 0; + xyz[1]++; + if(xyz[1] == Ndims[1]) + { + xyz[1] = 0; + xyz[0]++; + } + } + } + + if(just_count_flag) + { + MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++) + { + nexport += count_send[j]; + nimport += count_recv[j]; + + if(j > 0) + { + offset_send[j] = offset_send[j - 1] + count_send[j - 1]; + offset_recv[j] = offset_recv[j - 1] + count_recv[j - 1]; + } + } + + if(nexport != ncells) + terminate("nexport=%lld != ncells=%lld", (long long)nexport, (long long)ncells); + } + else + { + nimport = 0; + + /* exchange all the data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(count_send[recvTask] > 0 || count_recv[recvTask] > 0) + myMPI_Sendrecv(&out[offset_send[recvTask]], count_send[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, TAG_DENS_A, + &data[offset_recv[recvTask]], count_recv[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, + TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + nimport += count_recv[recvTask]; + } + } + + /* now loop over the new cell layout */ + /* find enclosing rectangle around columns in new plane */ + + int first[3], last[3]; + + first[0] = out_firstcol / Ndims[perm[1]]; + first[1] = out_firstcol % Ndims[perm[1]]; + first[2] = 0; + + last[0] = (out_firstcol + out_ncol - 1) / Ndims[perm[1]]; + last[1] = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + last[2] = Ndims[perm[2]] - 1; + + if(first[1] + out_ncol >= Ndims[perm[1]]) + { + first[1] = 0; + last[1] = Ndims[perm[1]] - 1; + } + + /* now need to map this back to the old coordinates */ + + int xyz_first[3], xyz_last[3]; + + for(j = 0; j < 3; j++) + { + xyz_first[j] = first[perm_rev[j]]; + xyz_last[j] = last[perm_rev[j]]; + } + + memset(count_recv, 0, NTask * sizeof(size_t)); + + size_t count = 0; + + /* traverse an enclosing box around the new cell layout in the old order */ + for(xyz[0] = xyz_first[0]; xyz[0] <= xyz_last[0]; xyz[0]++) + for(xyz[1] = xyz_first[1]; xyz[1] <= xyz_last[1]; xyz[1]++) + for(xyz[2] = xyz_first[2]; xyz[2] <= xyz_last[2]; xyz[2]++) + { + /* check that the point is actually part of a column */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int col = uvw[0] * Ndims[perm[1]] + uvw[1]; + + if(col >= out_firstcol && col < out_firstcol + out_ncol) + { + /* determine origin task */ + int newcol = Ndims[1] * xyz[0] + xyz[1]; + if(newcol < in_pivotcol) + origin = newcol / in_avg; + else + origin = (newcol - in_pivotcol) / (in_avg - 1) + in_tasklastsection; + + size_t index = ((size_t)Ndims[perm[2]]) * (col - out_firstcol) + uvw[2]; + + /* move data element from origin task */ + size_t off = offset_recv[origin] + count_recv[origin]++; + out[index][0] = data[off][0]; + out[index][1] = data[off][1]; + + count++; + } + } + + if(count != nimport) + { + int fi = out_firstcol % Ndims[perm[1]]; + int la = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + + terminate("count=%lld nimport=%lld ncol=%d fi=%d la=%d first=%d last=%d\n", (long long)count, (long long)nimport, out_ncol, + fi, la, first[1], last[1]); + } + } +} + +/*! \brief Transposes column-based FFT data. + * + * \param[in] data Data to be transposed. + * \param[in] Ndims Global number of dimensions of data cube. + * \param[in] in_firstcol First column. + * \param[in] in_ncol Number of columns. + * \param[out] out Data output. + * \param[in] perm Permutations in dimensions. + * \param[out] out_firstcol First column in output data. + * \param[out] out_ncol Number of columns in output data. + * \param[out] offset_send Offset in array for send operation to MPI tasks. + * \param[out] offset_recv Offset in array for receive operation from MPI + * tasks. + * \param[out] count_send Count how many elements have to be sent to each + * MPI task. + * \param[out] count_recv Count how many elements have to be received from + * each MPI task. + * \param[in] just_count_flag Do element counting for communication instead + * of data transfer. + * + * \return void + */ +static void my_fft_column_transpose(fft_real *data, int Ndims[3], int in_firstcol, int in_ncol, fft_real *out, int perm[3], + int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send, + size_t *count_recv, size_t just_count_flag) +{ + int j, target, origin, ngrp, recvTask, perm_rev[3], xyz[3], uvw[3]; + size_t nimport, nexport; + + /* determine the inverse permutation */ + for(j = 0; j < 3; j++) + perm_rev[j] = perm[j]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) /* not yet the inverse */ + { + for(j = 0; j < 3; j++) + perm_rev[j] = perm[perm[j]]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) + terminate("bummer"); + } + + int in_colums = Ndims[0] * Ndims[1]; + int in_avg = (in_colums - 1) / NTask + 1; + int in_exc = NTask * in_avg - in_colums; + int in_tasklastsection = NTask - in_exc; + int in_pivotcol = in_tasklastsection * in_avg; + + int out_colums = Ndims[perm[0]] * Ndims[perm[1]]; + int out_avg = (out_colums - 1) / NTask + 1; + int out_exc = NTask * out_avg - out_colums; + int out_tasklastsection = NTask - out_exc; + int out_pivotcol = out_tasklastsection * out_avg; + + size_t i, ncells = ((size_t)in_ncol) * Ndims[2]; + + xyz[0] = in_firstcol / Ndims[1]; + xyz[1] = in_firstcol % Ndims[1]; + xyz[2] = 0; + + memset(count_send, 0, NTask * sizeof(size_t)); + + /* loop over all cells in input array and determine target processor */ + for(i = 0; i < ncells; i++) + { + /* determine target task */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int newcol = Ndims[perm[1]] * uvw[0] + uvw[1]; + if(newcol < out_pivotcol) + target = newcol / out_avg; + else + target = (newcol - out_pivotcol) / (out_avg - 1) + out_tasklastsection; + + /* move data element to targettask */ + + if(just_count_flag) + count_send[target]++; + else + { + size_t off = offset_send[target] + count_send[target]++; + out[off] = data[i]; + } + xyz[2]++; + if(xyz[2] == Ndims[2]) + { + xyz[2] = 0; + xyz[1]++; + if(xyz[1] == Ndims[1]) + { + xyz[1] = 0; + xyz[0]++; + } + } + } + + if(just_count_flag) + { + MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++) + { + nexport += count_send[j]; + nimport += count_recv[j]; + + if(j > 0) + { + offset_send[j] = offset_send[j - 1] + count_send[j - 1]; + offset_recv[j] = offset_recv[j - 1] + count_recv[j - 1]; + } + } + + if(nexport != ncells) + terminate("nexport=%lld != ncells=%lld", (long long)nexport, (long long)ncells); + } + else + { + nimport = 0; + + /* exchange all the data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(count_send[recvTask] > 0 || count_recv[recvTask] > 0) + myMPI_Sendrecv(&out[offset_send[recvTask]], count_send[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_DENS_A, + &data[offset_recv[recvTask]], count_recv[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_DENS_A, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + nimport += count_recv[recvTask]; + } + } + + /* now loop over the new cell layout */ + /* find enclosing rectangle around columns in new plane */ + + int first[3], last[3]; + + first[0] = out_firstcol / Ndims[perm[1]]; + first[1] = out_firstcol % Ndims[perm[1]]; + first[2] = 0; + + last[0] = (out_firstcol + out_ncol - 1) / Ndims[perm[1]]; + last[1] = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + last[2] = Ndims[perm[2]] - 1; + + if(first[1] + out_ncol >= Ndims[perm[1]]) + { + first[1] = 0; + last[1] = Ndims[perm[1]] - 1; + } + + /* now need to map this back to the old coordinates */ + + int xyz_first[3], xyz_last[3]; + + for(j = 0; j < 3; j++) + { + xyz_first[j] = first[perm_rev[j]]; + xyz_last[j] = last[perm_rev[j]]; + } + + memset(count_recv, 0, NTask * sizeof(size_t)); + + size_t count = 0; + + /* traverse an enclosing box around the new cell layout in the old order */ + for(xyz[0] = xyz_first[0]; xyz[0] <= xyz_last[0]; xyz[0]++) + for(xyz[1] = xyz_first[1]; xyz[1] <= xyz_last[1]; xyz[1]++) + for(xyz[2] = xyz_first[2]; xyz[2] <= xyz_last[2]; xyz[2]++) + { + /* check that the point is actually part of a column */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int col = uvw[0] * Ndims[perm[1]] + uvw[1]; + + if(col >= out_firstcol && col < out_firstcol + out_ncol) + { + /* determine origin task */ + int newcol = Ndims[1] * xyz[0] + xyz[1]; + if(newcol < in_pivotcol) + origin = newcol / in_avg; + else + origin = (newcol - in_pivotcol) / (in_avg - 1) + in_tasklastsection; + + size_t index = ((size_t)Ndims[perm[2]]) * (col - out_firstcol) + uvw[2]; + + /* move data element from origin task */ + size_t off = offset_recv[origin] + count_recv[origin]++; + out[index] = data[off]; + + count++; + } + } + + if(count != nimport) + { + int fi = out_firstcol % Ndims[perm[1]]; + int la = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + + terminate("count=%lld nimport=%lld ncol=%d fi=%d la=%d first=%d last=%d\n", (long long)count, (long long)nimport, out_ncol, + fi, la, first[1], last[1]); + } + } +} + +/*! \brief Transposes column-based complex FFT data. + * + * \param[in] data Data to be transposed. + * \param[in] Ndims Global number of dimensions of data cube. + * \param[in] in_firstcol First column. + * \param[in] in_ncol Number of columns. + * \param[out] out Data output. + * \param[in] perm Permutations in dimensions. + * \param[out] out_firstcol First column in output data. + * \param[out] out_ncol Number of columns in output data. + * \param[out] offset_send Offset in array for send operation to MPI tasks. + * \param[out] offset_recv Offset in array for receive operation from MPI + * tasks. + * \param[out] count_send Count how many elements have to be sent to each + * MPI task. + * \param[out] count_recv Count how many elements have to be received from + * each MPI task. + * \param[in] just_count_flag Do element counting for communication instead + * of data transfer. + * + * \return void + */ +static void my_fft_column_transpose_c(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3], + int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send, + size_t *count_recv, size_t just_count_flag) +{ + int j, target, origin, ngrp, recvTask, perm_rev[3], xyz[3], uvw[3]; + size_t nimport, nexport; + + /* determine the inverse permutation */ + for(j = 0; j < 3; j++) + perm_rev[j] = perm[j]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) /* not yet the inverse */ + { + for(j = 0; j < 3; j++) + perm_rev[j] = perm[perm[j]]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) + terminate("bummer"); + } + + int in_colums = Ndims[0] * Ndims[1]; + int in_avg = (in_colums - 1) / NTask + 1; + int in_exc = NTask * in_avg - in_colums; + int in_tasklastsection = NTask - in_exc; + int in_pivotcol = in_tasklastsection * in_avg; + + int out_colums = Ndims[perm[0]] * Ndims[perm[1]]; + int out_avg = (out_colums - 1) / NTask + 1; + int out_exc = NTask * out_avg - out_colums; + int out_tasklastsection = NTask - out_exc; + int out_pivotcol = out_tasklastsection * out_avg; + + size_t i, ncells = ((size_t)in_ncol) * Ndims[2]; + + xyz[0] = in_firstcol / Ndims[1]; + xyz[1] = in_firstcol % Ndims[1]; + xyz[2] = 0; + + memset(count_send, 0, NTask * sizeof(size_t)); + + /* loop over all cells in input array and determine target processor */ + for(i = 0; i < ncells; i++) + { + /* determine target task */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int newcol = Ndims[perm[1]] * uvw[0] + uvw[1]; + if(newcol < out_pivotcol) + target = newcol / out_avg; + else + target = (newcol - out_pivotcol) / (out_avg - 1) + out_tasklastsection; + + /* move data element to targettask */ + + if(just_count_flag) + count_send[target]++; + else + { + size_t off = offset_send[target] + count_send[target]++; + out[off][0] = data[i][0]; + out[off][1] = data[i][1]; + } + xyz[2]++; + if(xyz[2] == Ndims[2]) + { + xyz[2] = 0; + xyz[1]++; + if(xyz[1] == Ndims[1]) + { + xyz[1] = 0; + xyz[0]++; + } + } + } + + if(just_count_flag) + { + MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++) + { + nexport += count_send[j]; + nimport += count_recv[j]; + + if(j > 0) + { + offset_send[j] = offset_send[j - 1] + count_send[j - 1]; + offset_recv[j] = offset_recv[j - 1] + count_recv[j - 1]; + } + } + + if(nexport != ncells) + terminate("nexport=%lld != ncells=%lld", (long long)nexport, (long long)ncells); + } + else + { + nimport = 0; + + /* exchange all the data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(count_send[recvTask] > 0 || count_recv[recvTask] > 0) + myMPI_Sendrecv(&out[offset_send[recvTask]], count_send[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, TAG_DENS_A, + &data[offset_recv[recvTask]], count_recv[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, + TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + nimport += count_recv[recvTask]; + } + } + + /* now loop over the new cell layout */ + /* find enclosing rectangle around columns in new plane */ + + int first[3], last[3]; + + first[0] = out_firstcol / Ndims[perm[1]]; + first[1] = out_firstcol % Ndims[perm[1]]; + first[2] = 0; + + last[0] = (out_firstcol + out_ncol - 1) / Ndims[perm[1]]; + last[1] = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + last[2] = Ndims[perm[2]] - 1; + + if(first[1] + out_ncol >= Ndims[perm[1]]) + { + first[1] = 0; + last[1] = Ndims[perm[1]] - 1; + } + + /* now need to map this back to the old coordinates */ + + int xyz_first[3], xyz_last[3]; + + for(j = 0; j < 3; j++) + { + xyz_first[j] = first[perm_rev[j]]; + xyz_last[j] = last[perm_rev[j]]; + } + + memset(count_recv, 0, NTask * sizeof(size_t)); + + size_t count = 0; + + /* traverse an enclosing box around the new cell layout in the old order */ + for(xyz[0] = xyz_first[0]; xyz[0] <= xyz_last[0]; xyz[0]++) + for(xyz[1] = xyz_first[1]; xyz[1] <= xyz_last[1]; xyz[1]++) + for(xyz[2] = xyz_first[2]; xyz[2] <= xyz_last[2]; xyz[2]++) + { + /* check that the point is actually part of a column */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int col = uvw[0] * Ndims[perm[1]] + uvw[1]; + + if(col >= out_firstcol && col < out_firstcol + out_ncol) + { + /* determine origin task */ + int newcol = Ndims[1] * xyz[0] + xyz[1]; + if(newcol < in_pivotcol) + origin = newcol / in_avg; + else + origin = (newcol - in_pivotcol) / (in_avg - 1) + in_tasklastsection; + + size_t index = ((size_t)Ndims[perm[2]]) * (col - out_firstcol) + uvw[2]; + + /* move data element from origin task */ + size_t off = offset_recv[origin] + count_recv[origin]++; + out[index][0] = data[off][0]; + out[index][1] = data[off][1]; + + count++; + } + } + + if(count != nimport) + { + int fi = out_firstcol % Ndims[perm[1]]; + int la = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + + terminate("count=%lld nimport=%lld ncol=%d fi=%d la=%d first=%d last=%d\n", (long long)count, (long long)nimport, out_ncol, + fi, la, first[1], last[1]); + } + } +} + +#endif /* #ifndef FFT_COLUMN_BASED #else */ + +#endif /* #if defined(PMGRID) */ diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c b/src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c new file mode 100644 index 0000000000..7346af2849 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c @@ -0,0 +1,2087 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/pm/pm_non_periodic.c + * \date 05/2018 + * \brief Code for non-periodic FFT to compute long-range PM force. + * \details contains functions: + * void pm_init_regionsize(void) + * void pm_init_nonperiodic(void) + * int pmforce_is_particle_high_res(int type, MyDouble * Pos) + * void pmforce_nonperiodic_zoom_optimized_prepare_density(int + * grnr) + * void pmforce_nonperiodic_zoom_optimized_readout_forces_or_ + * potential(int grnr, int dim) + * void pmforce_nonperiodic_uniform_optimized_prepare_density( + * int grnr) + * void pmforce_nonperiodic_uniform_optimized_readout_forces_or_ + * potential(int grnr, int dim) + * int pmforce_nonperiodic(int grnr) + * void pm_setup_nonperiodic_kernel(void) + * static int pm_periodic_compare_sortindex(const void *a, + * const void *b) + * static void msort_pmperiodic_with_tmp(large_numpart_type * b, + * size_t n, large_numpart_type * t) + * static void mysort_pmperiodic(void *b, size_t n, size_t s, + * int (*cmp) (const void *, const void *)) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if defined(PMGRID) && (defined(PLACEHIGHRESREGION) || defined(GRAVITY_NOT_PERIODIC)) + +#if defined(LONG_X) || defined(LONG_Y) || defined(LONG_Z) +#error "LONG_X/Y/Z not supported for the non-periodic FFT gravity code" +#endif /* #if defined(LONG_X) || defined(LONG_Y) || defined (LONG_Z) */ + +#ifndef GRIDBOOST +#define GRIDBOOST 2 +#endif /* #ifndef GRIDBOOST */ + +#define GRID (GRIDBOOST * PMGRID) +#define GRIDz (GRID / 2 + 1) +#define GRID2 (2 * GRIDz) + +#if(GRID > 1024) +typedef long long large_array_offset; /* use a larger data type in this case so that we can always address all cells of the 3D grid + with a single index */ +#else /* #if (GRID > 1024) */ +typedef unsigned int large_array_offset; +#endif /* #if (GRID > 1024) #else */ + +#ifdef NUMPART_PER_TASK_LARGE +typedef long long large_numpart_type; /* if there is a risk that the local particle number times 8 overflows a 32-bit integer, this + data type should be used */ +#else /* #ifdef NUMPART_PER_TASK_LARGE */ +typedef int large_numpart_type; +#endif /* #ifdef NUMPART_PER_TASK_LARGE */ + +/* short-cut macros for accessing different 3D arrays */ +#define FI(x, y, z) (((large_array_offset)GRID2) * (GRID * (x) + (y)) + (z)) +#define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.base_firstcol) + (z)) +#define TI(x, y, z) (((large_array_offset)GRID) * ((x) + (y)*myplan.nslab_x) + (z)) + +static fft_plan myplan; /*!< In this structure, various bookkeeping variables for the distributed FFTs are stored */ + +/*! \var maxfftsize + * \brief maximum size of the local fft grid among all tasks + */ +static size_t maxfftsize; + +/*! \var rhogrid + * \brief This array hold the local part of the density field and + * after the FFTs the local part of the potential + * + * \var forcegrid + * \brief This array will contain the force field + * + * \var workspace + * \brief Workspace array used during the FFTs + */ +static fft_real *rhogrid, *forcegrid, *workspace; + +/*! \brief Array containing the FFT of 'rhogrid' + * + * This pointer points to the same array as 'rhogrid', + * because in-place FFTs are used. + */ +static fft_complex *fft_of_rhogrid; + +static fft_real *kernel[2]; +static fft_complex *fft_of_kernel[2]; + +/*! \param Determine particle extent. + * + * This function determines the particle extension of all particles, and for + * those types selected with PLACEHIGHRESREGION if this is used, and then + * determines the boundaries of the non-periodic FFT-mesh that can be placed + * on this region. Note that a sufficient buffer region at the rim of the + * occupied part of the mesh needs to be reserved in order to allow a correct + * finite differencing using a 4-point formula. In addition, to allow + * non-periodic boundaries, the actual FFT mesh used is twice as large in + * each dimension compared with GRID. + * + * \return void + */ +void pm_init_regionsize(void) +{ + double meshinner[2], xmin[2][3], xmax[2][3]; + int i, j; + + /* find enclosing rectangle */ + + for(j = 0; j < 3; j++) + { + xmin[0][j] = xmin[1][j] = 1.0e36; + xmax[0][j] = xmax[1][j] = -1.0e36; + } + + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + { + if(P[i].Pos[j] > xmax[0][j]) + xmax[0][j] = P[i].Pos[j]; + if(P[i].Pos[j] < xmin[0][j]) + xmin[0][j] = P[i].Pos[j]; + +#ifdef PLACEHIGHRESREGION + if(((1 << P[i].Type) & (PLACEHIGHRESREGION))) + { + if(P[i].Pos[j] > xmax[1][j]) + xmax[1][j] = P[i].Pos[j]; + if(P[i].Pos[j] < xmin[1][j]) + xmin[1][j] = P[i].Pos[j]; + } +#endif /* #ifdef PLACEHIGHRESREGION */ + } + + MPI_Allreduce(xmin, All.Xmintot, 6, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(xmax, All.Xmaxtot, 6, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + + for(j = 0; j < 2; j++) + { + All.TotalMeshSize[j] = All.Xmaxtot[j][0] - All.Xmintot[j][0]; + All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][1] - All.Xmintot[j][1]); + All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][2] - All.Xmintot[j][2]); +#ifdef ENLARGEREGION + All.TotalMeshSize[j] *= ENLARGEREGION; +#endif /* #ifdef ENLARGEREGION */ + + /* symmetrize the box onto the center */ + for(i = 0; i < 3; i++) + { + All.Xmintot[j][i] = (All.Xmintot[j][i] + All.Xmaxtot[j][i]) / 2 - All.TotalMeshSize[j] / 2; + All.Xmaxtot[j][i] = All.Xmintot[j][i] + All.TotalMeshSize[j]; + } + } + + /* this will produce enough room for zero-padding and buffer region to + allow finite differencing of the potential */ + + for(j = 0; j < 2; j++) + { + meshinner[j] = All.TotalMeshSize[j]; + All.TotalMeshSize[j] *= 2.001 * (GRID) / ((double)(GRID - 2 - 8)); + } + + /* move lower left corner by two cells to allow finite differencing of the potential by a 4-point function */ + + for(j = 0; j < 2; j++) + for(i = 0; i < 3; i++) + { + All.Corner[j][i] = All.Xmintot[j][i] - 2.0005 * All.TotalMeshSize[j] / GRID; + All.UpperCorner[j][i] = All.Corner[j][i] + (GRID / 2 - 1) * (All.TotalMeshSize[j] / GRID); + } + +#ifdef PLACEHIGHRESREGION + All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID; + All.Rcut[1] = RCUT * All.Asmth[1]; +#endif /* #ifdef PLACEHIGHRESREGION */ + +#ifdef PLACEHIGHRESREGION + if(2 * All.TotalMeshSize[1] / GRID < All.Rcut[0]) + { + All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double)(GRID - 2)); + + for(i = 0; i < 3; i++) + { + All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * All.Rcut[0]; + All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID); + } + + if(2 * All.TotalMeshSize[1] / GRID > All.Rcut[0]) + { + All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double)(GRID - 10)); + + for(i = 0; i < 3; i++) + { + All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * (All.Rcut[0] + 2 * All.TotalMeshSize[1] / GRID); + All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID); + } + } + + All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID; + All.Rcut[1] = RCUT * All.Asmth[1]; + + mpi_printf("PM-NONPERIODIC: All.Asmth[0]=%g All.Asmth[1]=%g\n", All.Asmth[0], All.Asmth[1]); + } +#endif /* #ifdef PLACEHIGHRESREGION */ + +#ifdef PLACEHIGHRESREGION + mpi_printf( + "PM-NONPERIODIC: Allowed region for isolated PM mesh (high-res): (%g|%g|%g) -> (%g|%g|%g) ext=%g totmeshsize=%g " + "meshsize=%g\n\n", + All.Xmintot[1][0], All.Xmintot[1][1], All.Xmintot[1][2], All.Xmaxtot[1][0], All.Xmaxtot[1][1], All.Xmaxtot[1][2], meshinner[1], + All.TotalMeshSize[1], All.TotalMeshSize[1] / GRID); +#endif /* #ifdef PLACEHIGHRESREGION */ +} + +/*! \brief Initialization of the non-periodic PM routines. + * + * The plan-files for FFTW are created. Finally, the routine to set-up the + * non-periodic Greens function is called. + * + * \return void + */ +void pm_init_nonperiodic(void) +{ + /* Set up the FFTW-3 plan files. */ + int ndim[1] = {GRID}; /* dimension of the 1D transforms */ + + /* temporarily allocate some arrays to make sure that out-of-place plans are created */ + rhogrid = (fft_real *)mymalloc("rhogrid", GRID2 * sizeof(fft_real)); + forcegrid = (fft_real *)mymalloc("forcegrid", GRID2 * sizeof(fft_real)); + +#ifdef DOUBLEPRECISION_FFTW + int alignflag = 0; +#else /* #ifdef DOUBLEPRECISION_FFTW */ + /* for single precision, the start of our FFT columns is presently only guaranteed to be 8-byte aligned */ + int alignflag = FFTW_UNALIGNED; +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ +#ifndef FFT_COLUMN_BASED + int stride = GRIDz; +#else /* #ifndef FFT_COLUMN_BASED */ + int stride = 1; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + myplan.forward_plan_zdir = FFTW(plan_many_dft_r2c)(1, ndim, 1, rhogrid, 0, 1, GRID2, (fft_complex *)forcegrid, 0, 1, GRIDz, + FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.forward_plan_xdir = + FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRID, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.forward_plan_ydir = + FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRID, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_zdir = FFTW(plan_many_dft_c2r)(1, ndim, 1, (fft_complex *)rhogrid, 0, 1, GRIDz, forcegrid, 0, 1, GRID2, + FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_xdir = + FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRID, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_ydir = + FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRID, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myfree(forcegrid); + myfree(rhogrid); + +#ifndef FFT_COLUMN_BASED + + my_slab_based_fft_init(&myplan, GRID, GRID, GRID); + + maxfftsize = myplan.largest_x_slab * GRID * ((size_t)GRID2); + +#else /* #ifndef FFT_COLUMN_BASED */ + + my_column_based_fft_init(&myplan, GRID, GRID, GRID); + + maxfftsize = myplan.max_datasize; + +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* now allocate memory to hold the FFT fields */ + + size_t bytes, bytes_tot = 0; + +#if defined(GRAVITY_NOT_PERIODIC) + kernel[0] = (fft_real *)mymalloc("kernel[0]", bytes = maxfftsize * sizeof(fft_real)); + bytes_tot += bytes; + fft_of_kernel[0] = (fft_complex *)kernel[0]; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + +#if defined(PLACEHIGHRESREGION) + kernel[1] = (fft_real *)mymalloc("kernel[1]", bytes = maxfftsize * sizeof(fft_real)); + bytes_tot += bytes; + fft_of_kernel[1] = (fft_complex *)kernel[1]; +#endif /* #if defined(PLACEHIGHRESREGION) */ + + mpi_printf("\nPM-NONPERIODIC: Allocated %g MByte for FFT kernel(s).\n\n", bytes_tot / (1024.0 * 1024.0)); +} + +#ifdef PLACEHIGHRESREGION +/*! \brief Is this a high res particle in high resolution region? + * + * For cosmological zoom simulations. + * + * \param[in] type Parcile type. + * \param[in] Pos Position of particle. + * + * \return 0: not high res; 1: high res. + */ +int pmforce_is_particle_high_res(int type, MyDouble *Pos) +{ + int flag = 1; + + if((1 << type) & (PLACEHIGHRESREGION)) + return 1; + +#if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) + double r2 = 0; + for(int j = 0; j < 3; j++) + r2 += pow(Pos[j] - 0.5 * (All.Xmintot[1][j] + All.Xmaxtot[1][j]), 2); + + if(sqrt(r2) > 0.5 * (All.Xmaxtot[1][0] - All.Xmintot[1][0])) + return 0; +#else /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) */ + + for(int j = 0; j < 3; j++) + if(Pos[j] < All.Xmintot[1][j] || Pos[j] > All.Xmaxtot[1][j]) + { + flag = 0; /* we are outside */ + break; + } + +#endif /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) #else */ + + return flag; +} +#endif /* #ifdef PLACEHIGHRESREGION */ + +#ifdef PM_ZOOM_OPTIMIZED + +static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)); +static int pm_periodic_compare_sortindex(const void *a, const void *b); + +/*! \brief This structure links the particles to the mesh cells, to which they + * contribute their mass. + * + * Each particle will have eight items of this structure in the 'part' array. + * For each of the eight mesh cells the CIC assignment will contribute, + * one item of this struct exists. + */ +static struct part_slab_data +{ + large_array_offset globalindex; /*!< index in the global density mesh */ + large_numpart_type partindex; /*!< contains the local particle index shifted by 2^3, the first three bits encode to which part of the + CIC assignment this item belongs to */ + large_array_offset localindex; /*!< index to a local copy of the corresponding mesh cell of the global density array (used during + local mass and force assignment) */ +} * part; /*!< array of part_slab_data linking the local particles to their mesh cells */ + +static size_t *localfield_sendcount, *localfield_first, *localfield_offset, *localfield_recvcount; +static large_array_offset *localfield_globalindex, *import_globalindex; +static fft_real *localfield_data, *import_data; +static large_numpart_type num_on_grid; + +/*! \brief Prepares density field for nonperiodic FFTs. + * + * \param[in] grnr (0, 1) 0 if full mesh, 1 if highres grid. + * + * \return void + */ +void pmforce_nonperiodic_zoom_optimized_prepare_density(int grnr) +{ + large_numpart_type i; + int level, recvTask; + MPI_Status status; + + double to_slab_fac = GRID / All.TotalMeshSize[grnr]; + + part = (struct part_slab_data *)mymalloc("part", 8 * (NumPart * sizeof(struct part_slab_data))); + large_numpart_type *part_sortindex = (large_numpart_type *)mymalloc("part_sortindex", 8 * (NumPart * sizeof(large_numpart_type))); + + int ngrid = 0; + + /* determine the cells each particle accesses */ + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0]) + continue; + if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1]) + continue; + if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2]) + continue; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_z = (int)(to_slab_fac * (pos[2] - All.Corner[grnr][2])); + int myngrid; + + { + myngrid = ngrid; + ngrid += 1; + } + + large_numpart_type index_on_grid = ((large_numpart_type)myngrid) * 8; + + int xx, yy, zz; + + for(xx = 0; xx < 2; xx++) + for(yy = 0; yy < 2; yy++) + for(zz = 0; zz < 2; zz++) + { + int slab_xx = slab_x + xx; + int slab_yy = slab_y + yy; + int slab_zz = slab_z + zz; + + if(slab_xx >= GRID) + slab_xx -= GRID; + if(slab_yy >= GRID) + slab_yy -= GRID; + if(slab_zz >= GRID) + slab_zz -= GRID; + + large_array_offset offset = FI(slab_xx, slab_yy, slab_zz); + + part[index_on_grid].partindex = (i << 3) + (xx << 2) + (yy << 1) + zz; + part[index_on_grid].globalindex = offset; + part_sortindex[index_on_grid] = index_on_grid; + index_on_grid++; + } + } + + /* note: num_on_grid will be 8 times larger than the particle number, but num_field_points will generally be much smaller */ + num_on_grid = ((large_numpart_type)ngrid) * 8; + + /* bring the part-field into the order of the accessed cells. This allows the removal of duplicates */ + mysort_pmperiodic(part_sortindex, num_on_grid, sizeof(large_numpart_type), pm_periodic_compare_sortindex); + + large_array_offset num_field_points; + + if(num_on_grid > 0) + num_field_points = 1; + else + num_field_points = 0; + + /* determine the number of unique field points */ + for(i = 1; i < num_on_grid; i++) + { + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + } + + /* allocate the local field */ + localfield_globalindex = (large_array_offset *)mymalloc_movable(&localfield_globalindex, "localfield_globalindex", + num_field_points * sizeof(large_array_offset)); + localfield_data = (fft_real *)mymalloc_movable(&localfield_data, "localfield_data", num_field_points * sizeof(fft_real)); + localfield_first = (size_t *)mymalloc_movable(&localfield_first, "localfield_first", NTask * sizeof(size_t)); + localfield_sendcount = (size_t *)mymalloc_movable(&localfield_sendcount, "localfield_sendcount", NTask * sizeof(size_t)); + localfield_offset = (size_t *)mymalloc_movable(&localfield_offset, "localfield_offset", NTask * sizeof(size_t)); + localfield_recvcount = (size_t *)mymalloc_movable(&localfield_recvcount, "localfield_recvcount", NTask * sizeof(size_t)); + + for(i = 0; i < NTask; i++) + { + localfield_first[i] = 0; + localfield_sendcount[i] = 0; + } + + /* establish the cross link between the part[ ]-array and the local list of + * mesh points. Also, count on which CPU the needed field points are stored. + */ + for(i = 0, num_field_points = 0; i < num_on_grid; i++) + { + if(i > 0) + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + + part[part_sortindex[i]].localindex = num_field_points; + + if(i > 0) + if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex) + continue; + + localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex; + +#ifndef FFT_COLUMN_BASED + int slab = part[part_sortindex[i]].globalindex / (GRID * GRID2); + int task = myplan.slab_to_task[slab]; +#else /* #ifndef FFT_COLUMN_BASED */ + int task, column = part[part_sortindex[i]].globalindex / (GRID2); + + if(column < myplan.pivotcol) + task = column / myplan.avg; + else + task = (column - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + if(localfield_sendcount[task] == 0) + localfield_first[task] = num_field_points; + + localfield_sendcount[task]++; + } + num_field_points++; + + for(i = 1, localfield_offset[0] = 0; i < NTask; i++) + localfield_offset[i] = localfield_offset[i - 1] + localfield_sendcount[i - 1]; + + myfree_movable(part_sortindex); + part_sortindex = NULL; + + /* now bin the local particle data onto the mesh list */ + for(i = 0; i < num_field_points; i++) + localfield_data[i] = 0; + + for(i = 0; i < num_on_grid; i += 8) + { + int pindex = (part[i].partindex >> 3); + + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[pindex].Type == 0) + pos = SphP[pindex].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[pindex].Pos; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_z = (int)(to_slab_fac * (pos[2] - All.Corner[grnr][2])); + + double dx = to_slab_fac * (pos[0] - All.Corner[grnr][0]) - slab_x; + double dy = to_slab_fac * (pos[1] - All.Corner[grnr][1]) - slab_y; + double dz = to_slab_fac * (pos[2] - All.Corner[grnr][2]) - slab_z; + + double weight = P[pindex].Mass; + + localfield_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); + localfield_data[part[i + 1].localindex] += weight * (1.0 - dx) * (1.0 - dy) * dz; + localfield_data[part[i + 2].localindex] += weight * (1.0 - dx) * dy * (1.0 - dz); + localfield_data[part[i + 3].localindex] += weight * (1.0 - dx) * dy * dz; + localfield_data[part[i + 4].localindex] += weight * (dx) * (1.0 - dy) * (1.0 - dz); + localfield_data[part[i + 5].localindex] += weight * (dx) * (1.0 - dy) * dz; + localfield_data[part[i + 6].localindex] += weight * (dx)*dy * (1.0 - dz); + localfield_data[part[i + 7].localindex] += weight * (dx)*dy * dz; + } + + rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real)); + + /* clear local FFT-mesh density field */ + large_array_offset ii; + for(ii = 0; ii < maxfftsize; ii++) + rhogrid[ii] = 0; + + /* exchange data and add contributions to the local mesh-path */ + MPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real)); + import_globalindex = + (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset)); + + if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0) + { + myMPI_Sendrecv(localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, import_data, localfield_recvcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_B, + import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_B, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + /* note: here every element in rhogrid is only accessed once, so there should be no race condition */ + for(i = 0; i < localfield_recvcount[recvTask]; i++) + { + /* determine offset in local FFT slab */ +#ifndef FFT_COLUMN_BASED + large_array_offset offset = + import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRID * ((large_array_offset)GRID2); +#else /* #ifndef FFT_COLUMN_BASED */ + large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + rhogrid[offset] += import_data[i]; + } + + if(level > 0) + { + myfree(import_globalindex); + myfree(import_data); + } + } + } +} + +/*! \brief Reads out the force component corresponding to spatial dimension + * 'dim'. + * + * If dim is negative, potential values are read out and assigned to + * particles. + * + * \param[in] grnr Number of grid (0: base, 1 high-res) + * \param[in] dim Dimension to be read out + * (<0: potential,>=0 force component). + * + * \return void + */ +void pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(int grnr, int dim) +{ +#ifdef EVALPOTENTIAL + /* factor to get potential */ + double fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3); +#endif /* #ifdef EVALPOTENTIAL */ + + large_numpart_type i; + int level, recvTask; + MPI_Status status; + + fft_real *grid; + + if(dim < 0) + grid = rhogrid; + else + grid = forcegrid; + + double to_slab_fac = GRID / All.TotalMeshSize[grnr]; + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real)); + import_globalindex = + (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset)); + + if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0) + { + myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_C, + import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_recvcount[recvTask]; i++) + { +#ifndef FFT_COLUMN_BASED + large_array_offset offset = + import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRID * ((large_array_offset)GRID2); +#else /* #ifndef FFT_COLUMN_BASED */ + large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + import_data[i] = grid[offset]; + } + + if(level > 0) + { + myMPI_Sendrecv(import_data, localfield_recvcount[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + myfree(import_globalindex); + myfree(import_data); + } + } + } + + /* read out the force/potential values, which all have been assembled in localfield_data */ + + int k, ngrid = (num_on_grid >> 3); + + for(k = 0; k < ngrid; k++) + { + large_numpart_type j = (((large_numpart_type)k) << 3); + + int i = (part[j].partindex >> 3); + + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + +#ifdef PLACEHIGHRESREGION + if(grnr == 1) + if(!(pmforce_is_particle_high_res(P[i].Type, pos))) + continue; +#endif /* #ifdef PLACEHIGHRESREGION */ + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + double dx = to_slab_fac * (pos[0] - All.Corner[grnr][0]) - slab_x; + + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + double dy = to_slab_fac * (pos[1] - All.Corner[grnr][1]) - slab_y; + + int slab_z = (int)(to_slab_fac * (pos[2] - All.Corner[grnr][2])); + double dz = to_slab_fac * (pos[2] - All.Corner[grnr][2]) - slab_z; + + double value = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + localfield_data[part[j + 1].localindex] * (1.0 - dx) * (1.0 - dy) * dz + + localfield_data[part[j + 2].localindex] * (1.0 - dx) * dy * (1.0 - dz) + + localfield_data[part[j + 3].localindex] * (1.0 - dx) * dy * dz + + localfield_data[part[j + 4].localindex] * (dx) * (1.0 - dy) * (1.0 - dz) + + localfield_data[part[j + 5].localindex] * (dx) * (1.0 - dy) * dz + + localfield_data[part[j + 6].localindex] * (dx)*dy * (1.0 - dz) + + localfield_data[part[j + 7].localindex] * (dx)*dy * dz; + + if(dim < 0) + { +#ifdef EVALPOTENTIAL + P[i].PM_Potential += value * fac; +#endif /* #ifdef EVALPOTENTIAL */ + } + else + P[i].GravPM[dim] += value; + } +} + +#else /* #ifdef PM_ZOOM_OPTIMIZED */ +/* Here come the routines for a different communication algorithm that is better suited for a homogenuously loaded boxes. + */ + +/*! \brief Particle buffer structure + */ +static struct partbuf +{ + MyFloat Mass; + MyFloat Pos[3]; +} * partin, *partout; + +static size_t nimport, nexport; + +static size_t *Sndpm_count, *Sndpm_offset; +static size_t *Rcvpm_count, *Rcvpm_offset; + +/*! \brief Prepares density for pm calculation in algorithm optimized for + * uniform densities. + * + * \param[in] grnr Number of grid (0: base grid, 1: high res grid). + * + * \return void + */ +void pmforce_nonperiodic_uniform_optimized_prepare_density(int grnr) +{ + int i, j; + + double to_slab_fac = GRID / All.TotalMeshSize[grnr]; + + /* We here enlarge NTask such that each thread gets his own cache line for send_count/send_offset. + * This should hopefully prevent a performance penalty from 'false sharing' for these variables + */ + int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t); + + Sndpm_count = mymalloc("Sndpm_count", MaxThreads * multiNtask * sizeof(size_t)); + Sndpm_offset = mymalloc("Sndpm_offset", MaxThreads * multiNtask * sizeof(size_t)); + Rcvpm_count = mymalloc("Rcvpm_count", NTask * sizeof(size_t)); + Rcvpm_offset = mymalloc("Rcvpm_offset", NTask * sizeof(size_t)); + + /* determine the slabs/columns each particles accesses */ + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + + /* each threads needs to do theloop to clear its send_count[] array */ + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0]) + continue; + if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1]) + continue; + if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2]) + continue; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_xx = slab_x + 1; + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + send_count[task0]++; + if(task0 != task1) + send_count[task1]++; +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + + int column0 = slab_x * GRID + slab_y; + int column1 = slab_x * GRID + slab_yy; + int column2 = slab_xx * GRID + slab_y; + int column3 = slab_xx * GRID + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + send_count[task0]++; + if(task1 != task0) + send_count[task1]++; + if(task2 != task1 && task2 != task0) + send_count[task2]++; + if(task3 != task0 && task3 != task1 && task3 != task2) + send_count[task3]++; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + } + + /* collect thread-specific offset table and collect the results from the other threads */ + for(i = 0, Sndpm_offset[0] = 0; i < NTask; i++) + for(j = 0; j < MaxThreads; j++) + { + int ind_prev, ind = j * multiNtask + i; + if(ind > 0) + { + if(j == 0) + ind_prev = (MaxThreads - 1) * multiNtask + i - 1; + else + ind_prev = ind - multiNtask; + + Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev]; + } + } + + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0; j < NTask; j++) + { + nexport += Sndpm_count[j]; + nimport += Rcvpm_count[j]; + + if(j > 0) + { + Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1]; + Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1]; + } + } + + /* allocate import and export buffer */ + partin = (struct partbuf *)mymalloc("partin", nimport * sizeof(struct partbuf)); + partout = (struct partbuf *)mymalloc("partout", nexport * sizeof(struct partbuf)); + + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask; + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + /* fill export buffer */ + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0]) + continue; + if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1]) + continue; + if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2]) + continue; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_xx = slab_x + 1; + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + size_t ind0 = send_offset[task0] + send_count[task0]++; + partout[ind0].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind0].Pos[j] = pos[j]; + + if(task0 != task1) + { + size_t ind1 = send_offset[task1] + send_count[task1]++; + partout[ind1].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind1].Pos[j] = pos[j]; + } +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + + int column0 = slab_x * GRID + slab_y; + int column1 = slab_x * GRID + slab_yy; + int column2 = slab_xx * GRID + slab_y; + int column3 = slab_xx * GRID + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + size_t ind0 = send_offset[task0] + send_count[task0]++; + partout[ind0].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind0].Pos[j] = pos[j]; + + if(task1 != task0) + { + size_t ind1 = send_offset[task1] + send_count[task1]++; + partout[ind1].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind1].Pos[j] = pos[j]; + } + if(task2 != task1 && task2 != task0) + { + size_t ind2 = send_offset[task2] + send_count[task2]++; + partout[ind2].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind2].Pos[j] = pos[j]; + } + if(task3 != task0 && task3 != task1 && task3 != task2) + { + size_t ind3 = send_offset[task3] + send_count[task3]++; + partout[ind3].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind3].Pos[j] = pos[j]; + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + } + + /* collect the send_count[] results from the other threads */ + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + int flag_big = 0, flag_big_all; + for(i = 0; i < NTask; i++) + if(Sndpm_count[i] * sizeof(struct partbuf) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* exchange particle data */ + myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset, sizeof(struct partbuf), flag_big_all, + MPI_COMM_WORLD); + + myfree(partout); + + /* allocate density field */ + rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real)); + + /* clear local FFT-mesh density field */ + large_array_offset ii; + + for(ii = 0; ii < maxfftsize; ii++) + rhogrid[ii] = 0; + +#ifndef FFT_COLUMN_BASED + /* bin particle data onto mesh, in multi-threaded fashion */ + { + int tid = get_thread_num(); + + int first_y, count_y; + subdivide_evenly(GRID, MaxThreads, tid, &first_y, &count_y); + int last_y = first_y + count_y - 1; + + for(i = 0; i < nimport; i++) + { + int slab_y = (int)(to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + double dy = to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]) - slab_y; + int flag_slab_y, flag_slab_yy; + + if(slab_y >= first_y && slab_y <= last_y) + flag_slab_y = 1; + else + flag_slab_y = 0; + + if(slab_yy >= first_y && slab_yy <= last_y) + flag_slab_yy = 1; + else + flag_slab_yy = 0; + + if(flag_slab_y || flag_slab_yy) + { + double mass = partin[i].Mass; + + int slab_x = (int)(to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0])); + int slab_z = (int)(to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2])); + int slab_xx = slab_x + 1; + int slab_zz = slab_z + 1; + + double dx = to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]) - slab_x; + double dz = to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]) - slab_z; + + int flag_slab_x, flag_slab_xx; + + if(myplan.slab_to_task[slab_x] == ThisTask) + { + slab_x -= myplan.first_slab_x_of_task[ThisTask]; + flag_slab_x = 1; + } + else + flag_slab_x = 0; + + if(myplan.slab_to_task[slab_xx] == ThisTask) + { + slab_xx -= myplan.first_slab_x_of_task[ThisTask]; + flag_slab_xx = 1; + } + else + flag_slab_xx = 0; + + if(flag_slab_x) + { + if(flag_slab_y) + { + rhogrid[FI(slab_x, slab_y, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FI(slab_x, slab_y, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz)); + } + + if(flag_slab_yy) + { + rhogrid[FI(slab_x, slab_yy, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz)); + rhogrid[FI(slab_x, slab_yy, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz)); + } + } + + if(flag_slab_xx) + { + if(flag_slab_y) + { + rhogrid[FI(slab_xx, slab_y, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FI(slab_xx, slab_y, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz)); + } + + if(flag_slab_yy) + { + rhogrid[FI(slab_xx, slab_yy, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz)); + rhogrid[FI(slab_xx, slab_yy, slab_zz)] += (mass * (dx) * (dy) * (dz)); + } + } + } + } + } + +#else /* #ifndef FFT_COLUMN_BASED */ + + struct data_cols + { + int col0, col1, col2, col3; + double dx, dy; + } * aux; + + aux = mymalloc("aux", nimport * sizeof(struct data_cols)); + + for(i = 0; i < nimport; i++) + { + int slab_x = (int)(to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0])); + int slab_xx = slab_x + 1; + + int slab_y = (int)(to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + + aux[i].dx = to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]) - slab_x; + aux[i].dy = to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]) - slab_y; + + aux[i].col0 = slab_x * GRID + slab_y; + aux[i].col1 = slab_x * GRID + slab_yy; + aux[i].col2 = slab_xx * GRID + slab_y; + aux[i].col3 = slab_xx * GRID + slab_yy; + } + + { + int tid = get_thread_num(); + + int first_col, last_col, count_col; + subdivide_evenly(myplan.base_ncol, MaxThreads, tid, &first_col, &count_col); + last_col = first_col + count_col - 1; + first_col += myplan.base_firstcol; + last_col += myplan.base_firstcol; + + for(i = 0; i < nimport; i++) + { + int flag0, flag1, flag2, flag3; + int col0 = aux[i].col0; + int col1 = aux[i].col1; + int col2 = aux[i].col2; + int col3 = aux[i].col3; + + if(col0 >= first_col && col0 <= last_col) + flag0 = 1; + else + flag0 = 0; + + if(col1 >= first_col && col1 <= last_col) + flag1 = 1; + else + flag1 = 0; + + if(col2 >= first_col && col2 <= last_col) + flag2 = 1; + else + flag2 = 0; + + if(col3 >= first_col && col3 <= last_col) + flag3 = 1; + else + flag3 = 0; + + if(flag0 || flag1 || flag2 || flag3) + { + double mass = partin[i].Mass; + + double dx = aux[i].dx; + double dy = aux[i].dy; + + int slab_z = (int)(to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2])); + int slab_zz = slab_z + 1; + + double dz = to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]) - slab_z; + + if(flag0) + { + rhogrid[FC(col0, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FC(col0, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz)); + } + + if(flag1) + { + rhogrid[FC(col1, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz)); + rhogrid[FC(col1, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz)); + } + + if(flag2) + { + rhogrid[FC(col2, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FC(col2, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz)); + } + + if(flag3) + { + rhogrid[FC(col3, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz)); + rhogrid[FC(col3, slab_zz)] += (mass * (dx) * (dy) * (dz)); + } + } + } + } + + myfree(aux); + +#endif /* #ifndef FFT_COLUMN_BASED #else */ +} + +/*! \brief If dim<0, this function reads out the potential, otherwise + * Cartesian force components. + * + * \param[in] grnr Grid number (0: base grid, 1: high res grid). + * \param[in] dim Dimension of component to be read out (< 0: potential). + * + * \return void + */ +void pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(int grnr, int dim) +{ +#ifdef EVALPOTENTIAL + /* factor to get potential */ + double fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3); +#endif /* #ifdef EVALPOTENTIAL */ + + double to_slab_fac = GRID / All.TotalMeshSize[grnr]; + + double *flistin = (double *)mymalloc("flistin", nimport * sizeof(double)); + double *flistout = (double *)mymalloc("flistout", nexport * sizeof(double)); + + fft_real *grid; + + if(dim < 0) + grid = rhogrid; + else + grid = forcegrid; + + size_t i; + for(i = 0; i < nimport; i++) + { + flistin[i] = 0; + + int slab_x = (int)(to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0])); + int slab_y = (int)(to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1])); + int slab_z = (int)(to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2])); + + double dx = to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]) - slab_x; + double dy = to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]) - slab_y; + double dz = to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]) - slab_z; + + int slab_xx = slab_x + 1; + int slab_yy = slab_y + 1; + int slab_zz = slab_z + 1; + +#ifndef FFT_COLUMN_BASED + if(myplan.slab_to_task[slab_x] == ThisTask) + { + slab_x -= myplan.first_slab_x_of_task[ThisTask]; + + flistin[i] += +grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) + + grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + + grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz); + } + + if(myplan.slab_to_task[slab_xx] == ThisTask) + { + slab_xx -= myplan.first_slab_x_of_task[ThisTask]; + + flistin[i] += +grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + + grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) + + grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) + + grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz); + } +#else /* #ifndef FFT_COLUMN_BASED */ + int column0 = slab_x * GRID + slab_y; + int column1 = slab_x * GRID + slab_yy; + int column2 = slab_xx * GRID + slab_y; + int column3 = slab_xx * GRID + slab_yy; + + if(column0 >= myplan.base_firstcol && column0 <= myplan.base_lastcol) + { + flistin[i] += +grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz); + } + if(column1 >= myplan.base_firstcol && column1 <= myplan.base_lastcol) + { + flistin[i] += + +grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz); + } + + if(column2 >= myplan.base_firstcol && column2 <= myplan.base_lastcol) + { + flistin[i] += + +grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz); + } + + if(column3 >= myplan.base_firstcol && column3 <= myplan.base_lastcol) + { + flistin[i] += +grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz); + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + + /* exchange the potential component data */ + int flag_big = 0, flag_big_all; + for(i = 0; i < NTask; i++) + if(Sndpm_count[i] * sizeof(double) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* exchange data */ + myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset, sizeof(double), flag_big_all, + MPI_COMM_WORLD); + + /* now assign them to the correct particles */ + int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t); + + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask; + + int j; + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + int i; + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0]) + continue; + if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1]) + continue; + if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2]) + continue; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_xx = slab_x + 1; + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + double value = flistout[send_offset[task0] + send_count[task0]++]; + + if(task0 != task1) + value += flistout[send_offset[task1] + send_count[task1]++]; +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + + int column0 = slab_x * GRID + slab_y; + int column1 = slab_x * GRID + slab_yy; + int column2 = slab_xx * GRID + slab_y; + int column3 = slab_xx * GRID + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + double value = flistout[send_offset[task0] + send_count[task0]++]; + + if(task1 != task0) + value += flistout[send_offset[task1] + send_count[task1]++]; + + if(task2 != task1 && task2 != task0) + value += flistout[send_offset[task2] + send_count[task2]++]; + + if(task3 != task0 && task3 != task1 && task3 != task2) + value += flistout[send_offset[task3] + send_count[task3]++]; +#endif /* #ifndef FFT_COLUMN_BASED */ + +#ifdef PLACEHIGHRESREGION + if(grnr == 1) + if(!(pmforce_is_particle_high_res(P[i].Type, pos))) + continue; +#endif /* #ifdef PLACEHIGHRESREGION */ + + if(dim < 0) + { +#ifdef EVALPOTENTIAL + P[i].PM_Potential += value * fac; +#endif /* #ifdef EVALPOTENTIAL */ + } + else + P[i].GravPM[dim] += value; + } + } + + int j; + /* restore total Sndpm_count */ + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + myfree(flistout); + myfree(flistin); +} +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + +/*! \brief Calculates the long-range non-periodic forces using the PM method. + * + * The potential is Gaussian filtered with Asmth, given in mesh-cell units. + * The potential is finite differenced using a 4-point finite differencing + * formula to obtain the force fields, which are then interpolated to the + * particle positions. We carry out a CIC charge assignment, and compute the + * potenial by Fourier transform methods. The CIC kernel is deconvolved. + * + * \param[in] grnr Grid number (0: base grid, 1 high res grid). + * + * \return 0 + */ +int pmforce_nonperiodic(int grnr) +{ + int i, j, flag, flagsum, dim; + + double tstart = second(); + + mpi_printf("PM-NONPERIODIC: Starting non-periodic PM calculation (grid=%d) presently allocated=%g MB).\n", grnr, + AllocatedBytes / (1024.0 * 1024.0)); + +#ifndef NUMPART_PER_TASK_LARGE + if((((long long)NumPart) << 3) >= (((long long)1) << 31)) + terminate("We are dealing with a too large particle number per MPI rank - enabling NUMPART_PER_TASK_LARGE might help."); +#endif /* #ifndef NUMPART_PER_TASK_LARGE */ + + double fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3); /* to get potential */ + fac *= 1 / (2 * All.TotalMeshSize[grnr] / GRID); /* for finite differencing */ + + /* first, check whether all particles lie in the allowed region */ + for(i = 0, flag = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + +#ifdef PLACEHIGHRESREGION + if(grnr == 0 || (grnr == 1 && pmforce_is_particle_high_res(P[i].Type, pos))) +#endif /* #ifdef PLACEHIGHRESREGION */ + { + for(j = 0; j < 3; j++) + { + if(pos[j] < All.Xmintot[grnr][j] || pos[j] > All.Xmaxtot[grnr][j]) + { + if(flag == 0) + { + printf("Particle Id=%llu on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\n", + (unsigned long long)P[i].ID, ThisTask, pos[0], pos[1], pos[2]); + myflush(stdout); + } + flag++; + break; + } + } + } + } + + MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if(flagsum > 0) + { + mpi_printf("PM-NONPERIODIC: In total %d particles were outside allowed range.\n", flagsum); + return 1; /* error - need to return because particles were outside allowed range */ + } + +#ifdef PM_ZOOM_OPTIMIZED + pmforce_nonperiodic_zoom_optimized_prepare_density(grnr); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_nonperiodic_uniform_optimized_prepare_density(grnr); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + + /* allocate the memory to hold the FFT fields */ + forcegrid = (fft_real *)mymalloc("forcegrid", maxfftsize * sizeof(fft_real)); + + workspace = forcegrid; + +#ifndef FFT_COLUMN_BASED + fft_of_rhogrid = (fft_complex *)&rhogrid[0]; +#else /* #ifndef FFT_COLUMN_BASED */ + fft_of_rhogrid = (fft_complex *)&workspace[0]; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* Do the FFT of the density field */ +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], 1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, rhogrid, workspace, 1); /* result is in workspace, not in rhogrid ! */ +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* multiply with kernel in Fourier space */ + /* multiply with the Fourier transform of the Green's function (kernel) */ + /* multiply with Green's function in order to obtain the potential */ + +#ifdef FFT_COLUMN_BASED + for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++) + { +#else /* #ifdef FFT_COLUMN_BASED */ + for(int x = 0; x < GRID; x++) + for(int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++) + for(int z = 0; z < GRIDz; z++) + { +#endif /* #ifdef FFT_COLUMN_BASED #else */ + +#ifndef FFT_COLUMN_BASED + large_array_offset ip = ((large_array_offset)GRIDz) * (GRID * (y - myplan.slabstart_y) + x) + z; +#endif /* #ifndef FFT_COLUMN_BASED */ + + double re = fft_of_rhogrid[ip][0] * fft_of_kernel[grnr][ip][0] - fft_of_rhogrid[ip][1] * fft_of_kernel[grnr][ip][1]; + double im = fft_of_rhogrid[ip][0] * fft_of_kernel[grnr][ip][1] + fft_of_rhogrid[ip][1] * fft_of_kernel[grnr][ip][0]; + + fft_of_rhogrid[ip][0] = re; + fft_of_rhogrid[ip][1] = im; + } + + /* Do the inverse FFT to get the potential */ + +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, rhogrid, workspace, -1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, workspace, rhogrid, -1); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* Now rhogrid holds the potential */ + +#ifdef EVALPOTENTIAL +#ifdef PM_ZOOM_OPTIMIZED + pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(grnr, -1); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(grnr, -1); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ +#endif /* #ifdef EVALPOTENTIAL */ + + /* get the force components by finite differencing of the potential for each dimension, + * and send the results back to the right CPUs + */ + for(dim = 2; dim >= 0; dim--) /* Calculate each component of the force. */ + { + /* we do the x component last, because for differencing the potential in the x-direction, we need to construct the transpose */ +#ifndef FFT_COLUMN_BASED + if(dim == 0) + my_slab_transposeA(&myplan, rhogrid, forcegrid); /* compute the transpose of the potential field for finite differencing */ + + for(int y = 2; y < GRID / 2 - 2; y++) + for(int x = 0; x < myplan.nslab_x; x++) + if(x + myplan.slabstart_x >= 2 && x + myplan.slabstart_x < GRID / 2 - 2) + for(int z = 2; z < GRID / 2 - 2; z++) + { + int yrr = y, yll = y, yr = y, yl = y; + int zrr = z, zll = z, zr = z, zl = z; + + switch(dim) + { + case 0: /* note: for the x-direction, we difference the transposed direction (y) */ + case 1: + yr = y + 1; + yl = y - 1; + yrr = y + 2; + yll = y - 2; + + break; + case 2: + zr = z + 1; + zl = z - 1; + zrr = z + 2; + zll = z - 2; + + break; + } + + if(dim == 0) + forcegrid[TI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[TI(x, yl, zl)] - rhogrid[TI(x, yr, zr)]) - + (1.0 / 6) * (rhogrid[TI(x, yll, zll)] - rhogrid[TI(x, yrr, zrr)])); + else + forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, yl, zl)] - rhogrid[FI(x, yr, zr)]) - + (1.0 / 6) * (rhogrid[FI(x, yll, zll)] - rhogrid[FI(x, yrr, zrr)])); + } + + if(dim == 0) + my_slab_transposeB(&myplan, forcegrid, rhogrid); /* reverse the transpose from above */ +#else /* #ifndef FFT_COLUMN_BASED */ + fft_real *scratch = NULL, *forcep, *potp; + + if(dim != 2) + { + scratch = mymalloc("scratch", myplan.fftsize * sizeof(fft_real)); /* need a third field as scratch space */ + memcpy(scratch, rhogrid, myplan.fftsize * sizeof(fft_real)); + + if(dim == 1) + my_fft_swap23(&myplan, scratch, forcegrid); + else + my_fft_swap13(&myplan, scratch, forcegrid); + } + + int ncols; + if(dim == 2) + ncols = myplan.base_ncol; + else if(dim == 1) + ncols = myplan.ncol_XZ; + else + ncols = myplan.ncol_YZ; + + large_array_offset i; + + for(i = 0; i < ncols; i++) + { + if(dim != 2) + { + forcep = &scratch[GRID * i]; + potp = &forcegrid[GRID * i]; + } + else + { + forcep = &forcegrid[GRID2 * i]; + potp = &rhogrid[GRID2 * i]; + } + + int z; + for(z = 2; z < GRID / 2 - 2; z++) + { + int zr = z + 1; + int zl = z - 1; + int zrr = z + 2; + int zll = z - 2; + + forcep[z] = fac * ((4.0 / 3) * (potp[zl] - potp[zr]) - (1.0 / 6) * (potp[zll] - potp[zrr])); + } + } + + if(dim != 2) + { + if(dim == 1) + my_fft_swap23back(&myplan, scratch, forcegrid); + else + my_fft_swap13back(&myplan, scratch, forcegrid); + + myfree(scratch); + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + +#ifdef PM_ZOOM_OPTIMIZED + pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(grnr, dim); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(grnr, dim); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + } + + /* free stuff */ + myfree(forcegrid); + myfree(rhogrid); + +#ifdef PM_ZOOM_OPTIMIZED + myfree(localfield_recvcount); + myfree(localfield_offset); + myfree(localfield_sendcount); + myfree(localfield_first); + myfree(localfield_data); + myfree(localfield_globalindex); + myfree(part); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + myfree(partin); + myfree(Rcvpm_offset); + myfree(Rcvpm_count); + myfree(Sndpm_offset); + myfree(Sndpm_count); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + + double tend = second(); + + mpi_printf("PM-NONPERIODIC: done. (took %g seconds)\n", timediff(tstart, tend)); + + return 0; +} + +/*! \brief Sets-up the Greens function for the non-periodic potential in real + * space, and then converts it to Fourier space by means of an FFT. + * + * \return void + */ +void pm_setup_nonperiodic_kernel(void) +{ + int i, j, k, x, y, z; + double xx, yy, zz, r, u, fac; + + mpi_printf("PM-NONPERIODIC: Setting up non-periodic PM kernel (GRID=%d) presently allocated=%g MB).\n", (int)GRID, + AllocatedBytes / (1024.0 * 1024.0)); + + /* now set up kernel and its Fourier transform */ + +#if defined(GRAVITY_NOT_PERIODIC) + for(i = 0; i < maxfftsize; i++) /* clear local field */ + kernel[0][i] = 0; + +#ifndef FFT_COLUMN_BASED + for(i = myplan.slabstart_x; i < (myplan.slabstart_x + myplan.nslab_x); i++) + for(j = 0; j < GRID; j++) + { +#else /* #ifndef FFT_COLUMN_BASED */ + int c; + for(c = myplan.base_firstcol; c < (myplan.base_firstcol + myplan.base_ncol); c++) + { + i = c / GRID; + j = c % GRID; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + for(k = 0; k < GRID; k++) + { + xx = ((double)i) / GRID; + yy = ((double)j) / GRID; + zz = ((double)k) / GRID; + + if(xx >= 0.5) + xx -= 1.0; + if(yy >= 0.5) + yy -= 1.0; + if(zz >= 0.5) + zz -= 1.0; + + r = sqrt(xx * xx + yy * yy + zz * zz); + + u = 0.5 * r / (((double)ASMTH) / GRID); + + fac = 1 - erfc(u); + +#ifndef FFT_COLUMN_BASED + size_t ip = FI(i - myplan.slabstart_x, j, k); +#else /* #ifndef FFT_COLUMN_BASED */ + size_t ip = FC(c, k); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + if(r > 0) + kernel[0][ip] = -fac / r; + else + kernel[0][ip] = -1 / (sqrt(M_PI) * (((double)ASMTH) / GRID)); + } + } + + { + fft_real *workspc = (fft_real *)mymalloc("workspc", maxfftsize * sizeof(fft_real)); + /* Do the FFT of the kernel */ +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, kernel[0], workspc, 1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, kernel[0], workspc, 1); /* result is in workspace, not in kernel */ + memcpy(kernel[0], workspc, maxfftsize * sizeof(fft_real)); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + myfree(workspc); + } + +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + +#if defined(PLACEHIGHRESREGION) + + for(i = 0; i < maxfftsize; i++) /* clear local field */ + kernel[1][i] = 0; + +#ifndef FFT_COLUMN_BASED + for(i = myplan.slabstart_x; i < (myplan.slabstart_x + myplan.nslab_x); i++) + for(j = 0; j < GRID; j++) + { +#else /* #ifndef FFT_COLUMN_BASED */ + int c; + for(c = myplan.base_firstcol; c < (myplan.base_firstcol + myplan.base_ncol); c++) + { + i = c / GRID; + j = c % GRID; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + for(k = 0; k < GRID; k++) + { + xx = ((double)i) / GRID; + yy = ((double)j) / GRID; + zz = ((double)k) / GRID; + + if(xx >= 0.5) + xx -= 1.0; + if(yy >= 0.5) + yy -= 1.0; + if(zz >= 0.5) + zz -= 1.0; + + r = sqrt(xx * xx + yy * yy + zz * zz); + + u = 0.5 * r / (((double)ASMTH) / GRID); + + fac = erfc(u * All.Asmth[1] / All.Asmth[0]) - erfc(u); + +#ifndef FFT_COLUMN_BASED + size_t ip = FI(i - myplan.slabstart_x, j, k); +#else /* #ifndef FFT_COLUMN_BASED */ + size_t ip = FC(c, k); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + if(r > 0) + kernel[1][ip] = -fac / r; + else + { + fac = 1 - All.Asmth[1] / All.Asmth[0]; + kernel[1][ip] = -fac / (sqrt(M_PI) * (((double)ASMTH) / GRID)); + } + } + } + + { + fft_real *workspc = (fft_real *)mymalloc("workspc", maxfftsize * sizeof(fft_real)); + /* Do the FFT of the kernel */ +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, kernel[1], workspc, 1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, kernel[1], workspc, 1); /* result is in workspace, not in kernel */ + memcpy(kernel[1], workspc, maxfftsize * sizeof(fft_real)); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + myfree(workspc); + } + +#endif /* #if defined(PLACEHIGHRESREGION) */ + + /* deconvolve the Greens function twice with the CIC kernel */ +#ifdef FFT_COLUMN_BASED + + large_array_offset ip, ipcell; + + for(ip = 0; ip < myplan.second_transposed_ncells; ip++) + { + ipcell = ip + myplan.transposed_firstcol * GRID; + y = ipcell / (GRID * GRIDz); + int yr = ipcell % (GRID * GRIDz); + z = yr / GRID; + x = yr % GRID; +#else /* #ifdef FFT_COLUMN_BASED */ + for(x = 0; x < GRID; x++) + for(y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++) + for(z = 0; z < GRIDz; z++) + { +#endif /* #ifdef FFT_COLUMN_BASED #else */ + + double kx, ky, kz; + + if(x > GRID / 2) + kx = x - GRID; + else + kx = x; + if(y > GRID / 2) + ky = y - GRID; + else + ky = y; + if(z > GRID / 2) + kz = z - GRID; + else + kz = z; + + double k2 = kx * kx + ky * ky + kz * kz; + + if(k2 > 0) + { + double fx = 1, fy = 1, fz = 1; + + if(kx != 0) + { + fx = (M_PI * kx) / GRID; + fx = sin(fx) / fx; + } + if(ky != 0) + { + fy = (M_PI * ky) / GRID; + fy = sin(fy) / fy; + } + if(kz != 0) + { + fz = (M_PI * kz) / GRID; + fz = sin(fz) / fz; + } + + double ff = 1 / (fx * fy * fz); + ff = ff * ff * ff * ff; + +#ifndef FFT_COLUMN_BASED + large_array_offset ip = ((large_array_offset)GRIDz) * (GRID * (y - myplan.slabstart_y) + x) + z; +#endif /* #ifndef FFT_COLUMN_BASED */ +#if defined(GRAVITY_NOT_PERIODIC) + fft_of_kernel[0][ip][0] *= ff; + fft_of_kernel[0][ip][1] *= ff; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ +#if defined(PLACEHIGHRESREGION) + fft_of_kernel[1][ip][0] *= ff; + fft_of_kernel[1][ip][1] *= ff; +#endif /* #if defined(PLACEHIGHRESREGION) */ + } + } + + /* end deconvolution */ +} + +#ifdef PM_ZOOM_OPTIMIZED + +/*! \brief Sort function for 'part' array indices. + * + * Sorts the indices into the 'part' array by the global index of the + * corresponding 'part_slab_data' struct. + * + * \param[in] a index to be compared. + * \param[in] b index to be compared. + * + * \return sort result + */ +static int pm_periodic_compare_sortindex(const void *a, const void *b) +{ + if(part[*(int *)a].globalindex < part[*(int *)b].globalindex) + return -1; + + if(part[*(int *)a].globalindex > part[*(int *)b].globalindex) + return +1; + + return 0; +} + +/*! \brief Implements the sorting function for mysort_pmperiodic() + * + * The index array is sorted using a merge sort algorithm. + * + * \param[in, out] b Index array to sort. + * \param[in] n Number of elements to sort. + * \param[out] t Temporary buffer array. + * + * \return void + */ +static void msort_pmperiodic_with_tmp(large_numpart_type *b, size_t n, large_numpart_type *t) +{ + large_numpart_type *tmp; + large_numpart_type *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + msort_pmperiodic_with_tmp(b1, n1, t); + msort_pmperiodic_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(part[*b1].globalindex <= part[*b2].globalindex) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(large_numpart_type)); + + memcpy(b, t, (n - n2) * sizeof(large_numpart_type)); +} + +/*! \brief Sorts the index array b of n entries using the sort kernel + * cmp. + * + * The parameter s is set to sizeof(int). The index array b + * is sorted according to the globalindex field of the referenced item in the + * 'part' array + * + * \param[in, out] b The index array to sort. + * \param[in] n Number of entries in array b. + * \param[in] s Size of each entry (must be sizeof(int)). + * \param[in] cmp Comparison function. + * + * \return void + */ +static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)) +{ + const size_t size = n * s; + + large_numpart_type *tmp = (large_numpart_type *)mymalloc("tmp", size); + + msort_pmperiodic_with_tmp((large_numpart_type *)b, n, tmp); + + myfree(tmp); +} +#endif /* #ifdef PM_ZOOM_OPTIMIZED */ + +#endif /* #if defined(PMGRID) && (defined(PLACEHIGHRESREGION) || defined(GRAVITY_NOT_PERIODIC)) */ diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_periodic.c b/src/amuse/community/arepo/src/gravity/pm/pm_periodic.c new file mode 100644 index 0000000000..319404f797 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/pm/pm_periodic.c @@ -0,0 +1,2034 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/pm/pm_periodic.c + * \date 05/2018 + * \brief Routines for periodic PM-force computation. + * \details These routines support two different strategies for doing the + * particle data exchange to assemble the density field and to + * read out the forces and potentials: + * + * The default scheme sends the particle positions to the target + * slabs, and bins them there. This works usually well for + * homogeneously loaded boxes, but can be problematic for zoom-in + * runs. In the latter case, PM_ZOOM_OPTIMIZED can be activated, + * where the data is binned on the originating processor followed + * by assembly of the binned density field. + * + * In addition, the routines can be either used with a slab-based + * FFT (as is traditionally done in FFTW), or with a column-based + * FFT. The latter requires more communication and is hence + * usually slower than the slab-based one. But if the number of + * MPI ranks exceeds the number of cells per dimension, then the + * column-based one can still scale and offers a balanced memory + * consumption, whereas this is not the case for the slab-based + * approach. To select the column-based FFT, the switch + * FFT_COLUMN_BASED can be activated. + * + * The switches PM_ZOOM_OPTIMIZED and FFT_COLUMN_BASED may also + * be combined, such that there are 4 main modes of how the PM + * routines may operate. + * + * It is also possible to use non-cubical boxes, by means of + * setting one or several of the LONG_X, LONG_Y, and LONG_Z + * options in the config file. The values need to be integers, + * and then BoxSize is stretched by that factor in the + * corresponding dimension. + * + * Much of the code is multi-threaded, so there should be some + * speed-up if OpenMP is used with NUM_THREADS > 1, but the + * benefit may be limited because the data transfer steps (which + * weigh in quite heavily) are not accelerated by this. + * + * If eight times the particle load per processor exceeds 2^31 + * ~ 2 billion, one should activate NUMPART_PER_TASK_LARGE. The + * code will check this condition and terminate if this is + * violated, so there should hopefully be no severe risk to + * accidentally forget this. + * + * contains functions: + * void pm_init_periodic(void) + * void pmforce_zoom_optimized_prepare_density(int mode, int + * *typelist) + * void pmforce_zoom_optimized_readout_forces_or_potential(int + * dim) + * static void pmforce_uniform_optimized_prepare_density(int + * mode) + * static void pmforce_uniform_optimized_readout_forces_or_ + * potential(int dim) + * void pmforce_periodic(int mode, int *typelist) + * static int pm_periodic_compare_sortindex(const void *a, + * const void *b) + * static void msort_pmperiodic_with_tmp(large_numpart_type * b, + * size_t n, large_numpart_type * t) + * static void mysort_pmperiodic(void *b, size_t n, size_t s, + * int (*cmp) (const void *, const void *)) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if defined(PMGRID) + +#define GRIDX (PMGRID * STRETCHX * DBX + DBX_EXTRA) +#define GRIDY (PMGRID * STRETCHY * DBY + DBY_EXTRA) +#define GRIDZ (PMGRID * STRETCHZ * DBZ + DBZ_EXTRA) + +#define GRIDz (GRIDZ / 2 + 1) +#define GRID2 (2 * GRIDz) + +#if(GRIDX > 1024) || (GRIDY > 1024) || (GRIDZ > 1024) +typedef long long large_array_offset; /* use a larger data type in this case so that we can always address all cells of the 3D grid + with a single index */ +#else /* #if (GRIDX > 1024) || (GRIDY > 1024) || (GRIDZ > 1024) */ +typedef unsigned int large_array_offset; +#endif /* #if (GRIDX > 1024) || (GRIDY > 1024) || (GRIDZ > 1024) #else */ + +#ifdef NUMPART_PER_TASK_LARGE +typedef long long large_numpart_type; /* if there is a risk that the local particle number times 8 overflows a 32-bit integer, this + data type should be used */ +#else /* #ifdef NUMPART_PER_TASK_LARGE */ +typedef int large_numpart_type; +#endif /* #ifdef NUMPART_PER_TASK_LARGE #else */ + +/* short-cut macros for accessing different 3D arrays */ +#define FI(x, y, z) (((large_array_offset)GRID2) * (GRIDY * (x) + (y)) + (z)) +#define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.base_firstcol) + (z)) +#ifndef FFT_COLUMN_BASED +#define NI(x, y, z) (((large_array_offset)GRIDZ) * ((y) + (x)*myplan.nslab_y) + (z)) +#endif /* #ifndef FFT_COLUMN_BASED */ + +/* variables for power spectrum estimation */ +#ifndef BINS_PS +#define BINS_PS 2000 /* number of bins for power spectrum computation */ +#endif /* #ifndef BINS_PS */ +#ifndef POWERSPEC_FOLDFAC +#define POWERSPEC_FOLDFAC 16. /* folding factor to obtain an estimate of the power spectrum on very small scales */ +#endif /* #ifndef POWERSPEC_FOLDFAC */ + +static fft_plan myplan; /*!< In this structure, various bookkeeping variables for the distributed FFTs are stored */ + +/*! \var maxfftsize + * \brief maximum size of the local fft grid among all tasks + */ +static size_t maxfftsize; + +/*! \var rhogrid + * \brief This array hold the local part of the density field and + * after the FFTs the local part of the potential + * + * \var forcegrid + * \brief This array will contain the force field + * + * \var workspace + * \brief Workspace array used during the FFTs + */ +static fft_real *rhogrid, *forcegrid, *workspace; + +/*! \brief Array containing the FFT of #rhogrid + * + * This pointer points to the same array as #rhogrid, + * because in-place FFTs are used. + */ +static fft_complex *fft_of_rhogrid; + +/* Variable for power spectrum calculation */ +static double power_spec_totmass, power_spec_totmass2; +static long long power_spec_totnumpart; + +/*! \brief This routine generates the FFT-plans to carry out the FFTs later on. + * + * Some auxiliary variables for bookkeeping are also initialized. + * + * \return void + */ +void pm_init_periodic(void) +{ +#ifdef LONG_X + if(LONG_X != (int)(LONG_X)) + terminate("LONG_X must be an integer if used with PMGRID"); +#endif /* #ifdef LONG_X */ + +#ifdef LONG_Y + if(LONG_Y != (int)(LONG_Y)) + terminate("LONG_Y must be an integer if used with PMGRID"); +#endif /* #ifdef LONG_Y */ + +#ifdef LONG_Z + if(LONG_Z != (int)(LONG_Z)) + terminate("LONG_Z must be an integer if used with PMGRID"); +#endif /* #ifdef LONG_Z */ + + All.Asmth[0] = ASMTH * All.BoxSize / PMGRID; + All.Rcut[0] = RCUT * All.Asmth[0]; + + /* Set up the FFTW-3 plan files. */ + int ndimx[1] = {GRIDX}; /* dimension of the 1D transforms */ + int ndimy[1] = {GRIDY}; /* dimension of the 1D transforms */ + int ndimz[1] = {GRIDZ}; /* dimension of the 1D transforms */ + + int max_GRID2 = 2 * (imax(imax(GRIDX, GRIDY), GRIDZ) / 2 + 1); + + /* temporarily allocate some arrays to make sure that out-of-place plans are created */ + rhogrid = (fft_real *)mymalloc("rhogrid", max_GRID2 * sizeof(fft_real)); + forcegrid = (fft_real *)mymalloc("forcegrid", max_GRID2 * sizeof(fft_real)); + +#ifdef DOUBLEPRECISION_FFTW + int alignflag = 0; +#else /* #ifdef DOUBLEPRECISION_FFTW */ + /* for single precision, the start of our FFT columns is presently only guaranteed to be 8-byte aligned */ + int alignflag = FFTW_UNALIGNED; +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ + + myplan.forward_plan_zdir = FFTW(plan_many_dft_r2c)(1, ndimz, 1, rhogrid, 0, 1, GRID2, (fft_complex *)forcegrid, 0, 1, GRIDz, + FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + +#ifndef FFT_COLUMN_BASED + int stride = GRIDz; +#else /* #ifndef FFT_COLUMN_BASED */ + int stride = 1; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + myplan.forward_plan_ydir = + FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDY, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRIDY, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.forward_plan_xdir = + FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDX, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRIDX, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_xdir = + FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDX, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRIDX, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_ydir = + FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDY, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRIDY, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_zdir = FFTW(plan_many_dft_c2r)(1, ndimz, 1, (fft_complex *)rhogrid, 0, 1, GRIDz, forcegrid, 0, 1, GRID2, + FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myfree(forcegrid); + myfree(rhogrid); + +#ifndef FFT_COLUMN_BASED + + my_slab_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ); + + maxfftsize = imax(myplan.largest_x_slab * GRIDY, myplan.largest_y_slab * GRIDX) * ((size_t)GRID2); + +#else /* #ifndef FFT_COLUMN_BASED */ + + my_column_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ); + + maxfftsize = myplan.max_datasize; + +#endif /* #ifndef FFT_COLUMN_BASED #else */ +} + +/* Below, the two functions + * + * pmforce_ ...... _prepare_density() + * and + * pmforce_ ...... _readout_forces_or_potential(int dim) + * + * are defined in two different versions, one that works better for uniform + * simulations, the other for zoom-in runs. Only one of the two sets is used, + * depending on the setting of PM_ZOOM_OPTIMIZED. + */ +#ifdef PM_ZOOM_OPTIMIZED +static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)); +static int pm_periodic_compare_sortindex(const void *a, const void *b); + +/*! \brief This structure links the particles to the mesh cells, to which they + * contribute their mass. + * + * Each particle will have eight items of this structure in the #part array. + * For each of the eight mesh cells the CIC assignment will contribute, + * one item of this struct exists. + */ +static struct part_slab_data +{ + large_array_offset globalindex; /*!< index in the global density mesh */ + large_numpart_type partindex; /*!< contains the local particle index shifted by 2^3, the first three bits encode to which part of the + CIC assignment this item belongs to */ + large_array_offset localindex; /*!< index to a local copy of the corresponding mesh cell of the global density array (used during + local mass and force assignment) */ +} * part; /*!< array of part_slab_data linking the local particles to their mesh cells */ + +static size_t *localfield_sendcount, *localfield_first, *localfield_offset, *localfield_recvcount; +static large_array_offset *localfield_globalindex, *import_globalindex; +static fft_real *localfield_data, *import_data; + +/*! \brief Prepares density field for PM calculation in zoom-optimized + * algorithm. + * + * \param[in] mode Modes force calculation or power spectrum calculation. + * \param[in] typelist Which particles to include (only for power spectrum). + * + * \return void + */ +void pmforce_zoom_optimized_prepare_density(int mode, int *typelist) +{ + large_numpart_type i; + int level, recvTask; + MPI_Status status; + + double to_slab_fac = + PMGRID / All.BoxSize; /* note: This is the same as GRIDX / (All.BoxSize * LONG_X), and similarly for each dimension */ + + if(mode == 2) + to_slab_fac *= POWERSPEC_FOLDFAC; + if(mode == 3) + to_slab_fac *= POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC; + + part = (struct part_slab_data *)mymalloc("part", 8 * (NumPart * sizeof(struct part_slab_data))); + large_numpart_type *part_sortindex = (large_numpart_type *)mymalloc("part_sortindex", 8 * (NumPart * sizeof(large_numpart_type))); + + /* determine the cells each particle accesses */ + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_z = (int)(to_slab_fac * pos[2]); + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_y %= GRIDY; + slab_z %= GRIDZ; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + if(slab_y >= GRIDY) + slab_y -= GRIDY; + if(slab_z >= GRIDZ) + slab_z -= GRIDZ; + } + + large_numpart_type index_on_grid = ((large_numpart_type)i) << 3; + + for(int xx = 0; xx < 2; xx++) + for(int yy = 0; yy < 2; yy++) + for(int zz = 0; zz < 2; zz++) + { + int slab_xx = slab_x + xx; + int slab_yy = slab_y + yy; + int slab_zz = slab_z + zz; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + if(slab_zz >= GRIDZ) + slab_zz -= GRIDZ; + + large_array_offset offset = FI(slab_xx, slab_yy, slab_zz); + + part[index_on_grid].partindex = (i << 3) + (xx << 2) + (yy << 1) + zz; + part[index_on_grid].globalindex = offset; + part_sortindex[index_on_grid] = index_on_grid; + index_on_grid++; + } + } + + /* note: num_on_grid will be 8 times larger than the particle number, but num_field_points will generally be much smaller */ + + large_array_offset num_field_points; + large_numpart_type num_on_grid = ((large_numpart_type)NumPart) << 3; + + /* bring the part-field into the order of the accessed cells. This allows the removal of duplicates */ + mysort_pmperiodic(part_sortindex, num_on_grid, sizeof(large_numpart_type), pm_periodic_compare_sortindex); + + if(num_on_grid > 0) + num_field_points = 1; + else + num_field_points = 0; + + /* determine the number of unique field points */ + for(i = 1; i < num_on_grid; i++) + { + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + } + + /* allocate the local field */ + localfield_globalindex = (large_array_offset *)mymalloc_movable(&localfield_globalindex, "localfield_globalindex", + num_field_points * sizeof(large_array_offset)); + localfield_data = (fft_real *)mymalloc_movable(&localfield_data, "localfield_data", num_field_points * sizeof(fft_real)); + localfield_first = (size_t *)mymalloc_movable(&localfield_first, "localfield_first", NTask * sizeof(size_t)); + localfield_sendcount = (size_t *)mymalloc_movable(&localfield_sendcount, "localfield_sendcount", NTask * sizeof(size_t)); + localfield_offset = (size_t *)mymalloc_movable(&localfield_offset, "localfield_offset", NTask * sizeof(size_t)); + localfield_recvcount = (size_t *)mymalloc_movable(&localfield_recvcount, "localfield_recvcount", NTask * sizeof(size_t)); + + for(i = 0; i < NTask; i++) + { + localfield_first[i] = 0; + localfield_sendcount[i] = 0; + } + + /* establish the cross link between the part[ ]-array and the local list of + * mesh points. Also, count on which CPU the needed field points are stored. + */ + for(i = 0, num_field_points = 0; i < num_on_grid; i++) + { + if(i > 0) + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + + part[part_sortindex[i]].localindex = num_field_points; + + if(i > 0) + if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex) + continue; + + localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex; + +#ifndef FFT_COLUMN_BASED + int slab = part[part_sortindex[i]].globalindex / (GRIDY * GRID2); + int task = myplan.slab_to_task[slab]; +#else /* #ifndef FFT_COLUMN_BASED */ + int task, column = part[part_sortindex[i]].globalindex / (GRID2); + + if(column < myplan.pivotcol) + task = column / myplan.avg; + else + task = (column - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + if(localfield_sendcount[task] == 0) + localfield_first[task] = num_field_points; + + localfield_sendcount[task]++; + } + num_field_points++; + + for(i = 1, localfield_offset[0] = 0; i < NTask; i++) + localfield_offset[i] = localfield_offset[i - 1] + localfield_sendcount[i - 1]; + + myfree_movable(part_sortindex); + part_sortindex = NULL; + + /* now bin the local particle data onto the mesh list */ + for(i = 0; i < num_field_points; i++) + localfield_data[i] = 0; + + for(i = 0; i < num_on_grid; i += 8) + { + int pindex = (part[i].partindex >> 3); + + MyDouble *pos; +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[pindex].Type == 0) + { + posw[0] = WRAP_X(SphP[pindex].Center[0]); + posw[1] = WRAP_Y(SphP[pindex].Center[1]); + posw[2] = WRAP_Z(SphP[pindex].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[pindex].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_z = (int)(to_slab_fac * pos[2]); + + double dx = to_slab_fac * pos[0] - slab_x; + double dy = to_slab_fac * pos[1] - slab_y; + double dz = to_slab_fac * pos[2] - slab_z; + + double weight = P[pindex].Mass; + + if(mode) /* only for power spectrum calculation */ + if(typelist[P[pindex].Type] == 0) + continue; + + localfield_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); + localfield_data[part[i + 1].localindex] += weight * (1.0 - dx) * (1.0 - dy) * dz; + localfield_data[part[i + 2].localindex] += weight * (1.0 - dx) * dy * (1.0 - dz); + localfield_data[part[i + 3].localindex] += weight * (1.0 - dx) * dy * dz; + localfield_data[part[i + 4].localindex] += weight * (dx) * (1.0 - dy) * (1.0 - dz); + localfield_data[part[i + 5].localindex] += weight * (dx) * (1.0 - dy) * dz; + localfield_data[part[i + 6].localindex] += weight * (dx)*dy * (1.0 - dz); + localfield_data[part[i + 7].localindex] += weight * (dx)*dy * dz; + } + + rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real)); + + /* clear local FFT-mesh density field */ + large_array_offset ii; + for(ii = 0; ii < maxfftsize; ii++) + rhogrid[ii] = 0; + + /* exchange data and add contributions to the local mesh-path */ + MPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real)); + import_globalindex = + (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset)); + + if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0) + { + myMPI_Sendrecv(localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, import_data, localfield_recvcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_B, + import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_B, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + /* note: here every element in rhogrid is only accessed once, so there should be no race condition */ + for(i = 0; i < localfield_recvcount[recvTask]; i++) + { + /* determine offset in local FFT slab */ +#ifndef FFT_COLUMN_BASED + large_array_offset offset = + import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRIDY * ((large_array_offset)GRID2); +#else /* #ifndef FFT_COLUMN_BASED */ + large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + rhogrid[offset] += import_data[i]; + } + + if(level > 0) + { + myfree(import_globalindex); + myfree(import_data); + } + } + } +} + +/* \brief Function to read out the force component corresponding to spatial + * dimension 'dim'. + * + * \param[in] dim Dimension to be read out; If dim is negative, potential + * values are read out and assigned to particles. + * + * \return void + */ +void pmforce_zoom_optimized_readout_forces_or_potential(int dim) +{ +#ifdef EVALPOTENTIAL + double fac = 4 * M_PI * All.G / (pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ); /* to get potential */ +#endif /* #ifdef EVALPOTENTIAL */ + + large_numpart_type i; + int level, recvTask; + MPI_Status status; + + fft_real *grid; + + if(dim < 0) + grid = rhogrid; + else + grid = forcegrid; + + double to_slab_fac = PMGRID / All.BoxSize; + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real)); + import_globalindex = + (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset)); + + if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0) + { + myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_C, + import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_recvcount[recvTask]; i++) + { +#ifndef FFT_COLUMN_BASED + large_array_offset offset = + import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRIDY * ((large_array_offset)GRID2); +#else /* #ifndef FFT_COLUMN_BASED */ + large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + import_data[i] = grid[offset]; + } + + if(level > 0) + { + myMPI_Sendrecv(import_data, localfield_recvcount[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + myfree(import_globalindex); + myfree(import_data); + } + } + } + + /* read out the froce/potential values, which all have been assembled in localfield_data */ + for(i = 0; i < NumPart; i++) + { + large_numpart_type j = (i << 3); + + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + double dx = to_slab_fac * pos[0] - slab_x; + + int slab_y = (int)(to_slab_fac * pos[1]); + double dy = to_slab_fac * pos[1] - slab_y; + + int slab_z = (int)(to_slab_fac * pos[2]); + double dz = to_slab_fac * pos[2] - slab_z; + + double value = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + localfield_data[part[j + 1].localindex] * (1.0 - dx) * (1.0 - dy) * dz + + localfield_data[part[j + 2].localindex] * (1.0 - dx) * dy * (1.0 - dz) + + localfield_data[part[j + 3].localindex] * (1.0 - dx) * dy * dz + + localfield_data[part[j + 4].localindex] * (dx) * (1.0 - dy) * (1.0 - dz) + + localfield_data[part[j + 5].localindex] * (dx) * (1.0 - dy) * dz + + localfield_data[part[j + 6].localindex] * (dx)*dy * (1.0 - dz) + + localfield_data[part[j + 7].localindex] * (dx)*dy * dz; + + if(dim < 0) + { +#ifdef EVALPOTENTIAL + P[i].PM_Potential += value * fac; +#endif /* #ifdef EVALPOTENTIAL */ + } + else + P[i].GravPM[dim] += value; + } +} + +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + +/* + * Here come the routines for a different communication algorithm that is + * better suited for a homogenuously loaded boxes. + */ + +/*! \brief Structure for particle buffer. + */ +static struct partbuf +{ + MyFloat Mass; + MyFloat Pos[3]; +} * partin, *partout; + +static size_t nimport, nexport; + +static size_t *Sndpm_count, *Sndpm_offset; +static size_t *Rcvpm_count, *Rcvpm_offset; + +/*! \brief Prepares density field for PM calculation in uniform box optimized + * algorithm. + * + * \param[in] mode Modes force calculation. + * + * \return void + */ +static void pmforce_uniform_optimized_prepare_density(int mode) +{ + int i, j; + + double to_slab_fac = PMGRID / All.BoxSize; + + if(mode == 2) + to_slab_fac *= POWERSPEC_FOLDFAC; + if(mode == 3) + to_slab_fac *= POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC; + + /* We here enlarge NTask such that each thread gets his own cache line for send_count/send_offset. + * This should hopefully prevent a performance penalty from 'false sharing' for these variables + */ + int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t); + + Sndpm_count = (size_t *)mymalloc("Sndpm_count", MaxThreads * multiNtask * sizeof(size_t)); + Sndpm_offset = (size_t *)mymalloc("Sndpm_offset", MaxThreads * multiNtask * sizeof(size_t)); + Rcvpm_count = (size_t *)mymalloc("Rcvpm_count", NTask * sizeof(size_t)); + Rcvpm_offset = (size_t *)mymalloc("Rcvpm_offset", NTask * sizeof(size_t)); + + /* determine the slabs/columns each particles accesses */ + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + + /* each threads needs to do theloop to clear its send_count[] array */ + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_xx = slab_x + 1; + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_xx %= GRIDX; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + } + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + send_count[task0]++; + if(task0 != task1) + send_count[task1]++; +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_yy = slab_y + 1; + + if(mode >= 2) + { + slab_y %= GRIDY; + slab_yy %= GRIDY; + } + else + { + if(slab_y >= GRIDY) + slab_y -= GRIDY; + + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + } + + int column0 = slab_x * GRIDY + slab_y; + int column1 = slab_x * GRIDY + slab_yy; + int column2 = slab_xx * GRIDY + slab_y; + int column3 = slab_xx * GRIDY + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + send_count[task0]++; + if(task1 != task0) + send_count[task1]++; + if(task2 != task1 && task2 != task0) + send_count[task2]++; + if(task3 != task0 && task3 != task1 && task3 != task2) + send_count[task3]++; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + } + + /* collect thread-specific offset table and collect the results from the other threads */ + for(i = 0, Sndpm_offset[0] = 0; i < NTask; i++) + for(j = 0; j < MaxThreads; j++) + { + int ind_prev, ind = j * multiNtask + i; + if(ind > 0) + { + if(j == 0) + ind_prev = (MaxThreads - 1) * multiNtask + i - 1; + else + ind_prev = ind - multiNtask; + + Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev]; + } + } + + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0; j < NTask; j++) + { + nexport += Sndpm_count[j]; + nimport += Rcvpm_count[j]; + + if(j > 0) + { + Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1]; + Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1]; + } + } + + /* allocate import and export buffer */ + partin = (struct partbuf *)mymalloc("partin", nimport * sizeof(struct partbuf)); + partout = (struct partbuf *)mymalloc("partout", nexport * sizeof(struct partbuf)); + + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask; + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + /* fill export buffer */ + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_xx = slab_x + 1; + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_xx %= GRIDX; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + } + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + size_t ind0 = send_offset[task0] + send_count[task0]++; + partout[ind0].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind0].Pos[j] = pos[j]; + + if(task0 != task1) + { + size_t ind1 = send_offset[task1] + send_count[task1]++; + partout[ind1].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind1].Pos[j] = pos[j]; + } +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_yy = slab_y + 1; + + if(mode >= 2) + { + slab_y %= GRIDY; + slab_yy %= GRIDY; + } + else + { + if(slab_y >= GRIDY) + slab_y -= GRIDY; + + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + } + + int column0 = slab_x * GRIDY + slab_y; + int column1 = slab_x * GRIDY + slab_yy; + int column2 = slab_xx * GRIDY + slab_y; + int column3 = slab_xx * GRIDY + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + size_t ind0 = send_offset[task0] + send_count[task0]++; + partout[ind0].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind0].Pos[j] = pos[j]; + + if(task1 != task0) + { + size_t ind1 = send_offset[task1] + send_count[task1]++; + partout[ind1].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind1].Pos[j] = pos[j]; + } + if(task2 != task1 && task2 != task0) + { + size_t ind2 = send_offset[task2] + send_count[task2]++; + partout[ind2].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind2].Pos[j] = pos[j]; + } + if(task3 != task0 && task3 != task1 && task3 != task2) + { + size_t ind3 = send_offset[task3] + send_count[task3]++; + partout[ind3].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind3].Pos[j] = pos[j]; + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + } + + /* collect the send_count[] results from the other threads */ + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + int flag_big = 0, flag_big_all; + for(i = 0; i < NTask; i++) + if(Sndpm_count[i] * sizeof(struct partbuf) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* exchange particle data */ + myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset, sizeof(struct partbuf), flag_big_all, + MPI_COMM_WORLD); + + myfree(partout); + + /* allocate density field */ + rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real)); + + /* clear local FFT-mesh density field */ + large_array_offset ii; + for(ii = 0; ii < maxfftsize; ii++) + rhogrid[ii] = 0; + +#ifndef FFT_COLUMN_BASED + /* bin particle data onto mesh, in multi-threaded fashion */ + { + int tid = get_thread_num(); + + int first_y, count_y; + subdivide_evenly(GRIDY, MaxThreads, tid, &first_y, &count_y); + int last_y = first_y + count_y - 1; + + for(i = 0; i < nimport; i++) + { + int slab_y = (int)(to_slab_fac * partin[i].Pos[1]); + int slab_yy = slab_y + 1; + double dy = to_slab_fac * partin[i].Pos[1] - slab_y; + + if(mode >= 2) + { + slab_y %= GRIDY; + slab_yy %= GRIDY; + } + else + { + if(slab_y >= GRIDY) + slab_y -= GRIDY; + + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + } + + int flag_slab_y, flag_slab_yy; + + if(slab_y >= first_y && slab_y <= last_y) + flag_slab_y = 1; + else + flag_slab_y = 0; + + if(slab_yy >= first_y && slab_yy <= last_y) + flag_slab_yy = 1; + else + flag_slab_yy = 0; + + if(flag_slab_y || flag_slab_yy) + { + double mass = partin[i].Mass; + + int slab_x = (int)(to_slab_fac * partin[i].Pos[0]); + int slab_z = (int)(to_slab_fac * partin[i].Pos[2]); + int slab_xx = slab_x + 1; + int slab_zz = slab_z + 1; + + double dx = to_slab_fac * partin[i].Pos[0] - slab_x; + double dz = to_slab_fac * partin[i].Pos[2] - slab_z; + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_z %= GRIDZ; + slab_xx %= GRIDX; + slab_zz %= GRIDZ; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + if(slab_z >= GRIDZ) + slab_z -= GRIDZ; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + if(slab_zz >= GRIDZ) + slab_zz -= GRIDZ; + } + + int flag_slab_x, flag_slab_xx; + + if(myplan.slab_to_task[slab_x] == ThisTask) + { + slab_x -= myplan.first_slab_x_of_task[ThisTask]; + flag_slab_x = 1; + } + else + flag_slab_x = 0; + + if(myplan.slab_to_task[slab_xx] == ThisTask) + { + slab_xx -= myplan.first_slab_x_of_task[ThisTask]; + flag_slab_xx = 1; + } + else + flag_slab_xx = 0; + + if(flag_slab_x) + { + if(flag_slab_y) + { + rhogrid[FI(slab_x, slab_y, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FI(slab_x, slab_y, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz)); + } + + if(flag_slab_yy) + { + rhogrid[FI(slab_x, slab_yy, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz)); + rhogrid[FI(slab_x, slab_yy, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz)); + } + } + + if(flag_slab_xx) + { + if(flag_slab_y) + { + rhogrid[FI(slab_xx, slab_y, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FI(slab_xx, slab_y, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz)); + } + + if(flag_slab_yy) + { + rhogrid[FI(slab_xx, slab_yy, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz)); + rhogrid[FI(slab_xx, slab_yy, slab_zz)] += (mass * (dx) * (dy) * (dz)); + } + } + } + } + } + +#else /* #ifndef FFT_COLUMN_BASED */ + + struct data_cols + { + int col0, col1, col2, col3; + double dx, dy; + } * aux; + + aux = mymalloc("aux", nimport * sizeof(struct data_cols)); + + for(i = 0; i < nimport; i++) + { + int slab_x = (int)(to_slab_fac * partin[i].Pos[0]); + int slab_xx = slab_x + 1; + + int slab_y = (int)(to_slab_fac * partin[i].Pos[1]); + int slab_yy = slab_y + 1; + + aux[i].dx = to_slab_fac * partin[i].Pos[0] - slab_x; + aux[i].dy = to_slab_fac * partin[i].Pos[1] - slab_y; + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_xx %= GRIDX; + slab_y %= GRIDY; + slab_yy %= GRIDY; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + + if(slab_y >= GRIDY) + slab_y -= GRIDY; + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + } + + aux[i].col0 = slab_x * GRIDY + slab_y; + aux[i].col1 = slab_x * GRIDY + slab_yy; + aux[i].col2 = slab_xx * GRIDY + slab_y; + aux[i].col3 = slab_xx * GRIDY + slab_yy; + } + + { + int tid = get_thread_num(); + + int first_col, last_col, count_col; + subdivide_evenly(myplan.base_ncol, MaxThreads, tid, &first_col, &count_col); + last_col = first_col + count_col - 1; + first_col += myplan.base_firstcol; + last_col += myplan.base_firstcol; + + for(i = 0; i < nimport; i++) + { + int flag0, flag1, flag2, flag3; + int col0 = aux[i].col0; + int col1 = aux[i].col1; + int col2 = aux[i].col2; + int col3 = aux[i].col3; + + if(col0 >= first_col && col0 <= last_col) + flag0 = 1; + else + flag0 = 0; + + if(col1 >= first_col && col1 <= last_col) + flag1 = 1; + else + flag1 = 0; + + if(col2 >= first_col && col2 <= last_col) + flag2 = 1; + else + flag2 = 0; + + if(col3 >= first_col && col3 <= last_col) + flag3 = 1; + else + flag3 = 0; + + if(flag0 || flag1 || flag2 || flag3) + { + double mass = partin[i].Mass; + + double dx = aux[i].dx; + double dy = aux[i].dy; + + int slab_z = (int)(to_slab_fac * partin[i].Pos[2]); + int slab_zz = slab_z + 1; + + double dz = to_slab_fac * partin[i].Pos[2] - slab_z; + + if(mode >= 2) + { + slab_z %= GRIDZ; + slab_zz %= GRIDZ; + } + else + { + if(slab_z >= GRIDZ) + slab_z -= GRIDZ; + + if(slab_zz >= GRIDZ) + slab_zz -= GRIDZ; + } + + if(flag0) + { + rhogrid[FC(col0, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FC(col0, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz)); + } + + if(flag1) + { + rhogrid[FC(col1, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz)); + rhogrid[FC(col1, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz)); + } + + if(flag2) + { + rhogrid[FC(col2, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FC(col2, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz)); + } + + if(flag3) + { + rhogrid[FC(col3, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz)); + rhogrid[FC(col3, slab_zz)] += (mass * (dx) * (dy) * (dz)); + } + } + } + } + + myfree(aux); + +#endif /* #ifndef FFT_COLUMN_BASED #else */ +} + +/* \brief Function to read out the force component corresponding to spatial + * dimension 'dim'. + * + * \param[in] dim Dimension to be read out; If dim is negative, potential values + * are read out and assigned to particles. + * + * \return void + */ +static void pmforce_uniform_optimized_readout_forces_or_potential(int dim) +{ +#ifdef EVALPOTENTIAL + double fac = 4 * M_PI * All.G / (pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ); /* to get potential */ +#endif /* #ifdef EVALPOTENTIAL */ + + double to_slab_fac = PMGRID / All.BoxSize; + + double *flistin = (double *)mymalloc("flistin", nimport * sizeof(double)); + double *flistout = (double *)mymalloc("flistout", nexport * sizeof(double)); + + fft_real *grid; + + if(dim < 0) + grid = rhogrid; + else + grid = forcegrid; + + size_t i; + for(i = 0; i < nimport; i++) + { + flistin[i] = 0; + + int slab_x = (int)(to_slab_fac * partin[i].Pos[0]); + int slab_y = (int)(to_slab_fac * partin[i].Pos[1]); + int slab_z = (int)(to_slab_fac * partin[i].Pos[2]); + + double dx = to_slab_fac * partin[i].Pos[0] - slab_x; + double dy = to_slab_fac * partin[i].Pos[1] - slab_y; + double dz = to_slab_fac * partin[i].Pos[2] - slab_z; + + if(slab_x >= GRIDX) + slab_x -= GRIDX; + if(slab_y >= GRIDY) + slab_y -= GRIDY; + if(slab_z >= GRIDZ) + slab_z -= GRIDZ; + + int slab_xx = slab_x + 1; + int slab_yy = slab_y + 1; + int slab_zz = slab_z + 1; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + if(slab_zz >= GRIDZ) + slab_zz -= GRIDZ; + +#ifndef FFT_COLUMN_BASED + if(myplan.slab_to_task[slab_x] == ThisTask) + { + slab_x -= myplan.first_slab_x_of_task[ThisTask]; + + flistin[i] += grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) + + grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + + grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz); + } + + if(myplan.slab_to_task[slab_xx] == ThisTask) + { + slab_xx -= myplan.first_slab_x_of_task[ThisTask]; + + flistin[i] += grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + + grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) + + grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) + + grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz); + } +#else /* #ifndef FFT_COLUMN_BASED */ + int column0 = slab_x * GRIDY + slab_y; + int column1 = slab_x * GRIDY + slab_yy; + int column2 = slab_xx * GRIDY + slab_y; + int column3 = slab_xx * GRIDY + slab_yy; + + if(column0 >= myplan.base_firstcol && column0 <= myplan.base_lastcol) + { + flistin[i] += grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz); + } + if(column1 >= myplan.base_firstcol && column1 <= myplan.base_lastcol) + { + flistin[i] += + grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz); + } + + if(column2 >= myplan.base_firstcol && column2 <= myplan.base_lastcol) + { + flistin[i] += + grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz); + } + + if(column3 >= myplan.base_firstcol && column3 <= myplan.base_lastcol) + { + flistin[i] += grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz); + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + + /* exchange the potential component data */ + int flag_big = 0, flag_big_all; + for(i = 0; i < NTask; i++) + if(Sndpm_count[i] * sizeof(double) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* exchange data */ + myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset, sizeof(double), flag_big_all, + MPI_COMM_WORLD); + + /* now assign them to the correct particles */ + int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t); + + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask; + + int j; + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + int i; + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_xx = slab_x + 1; + + if(slab_x >= GRIDX) + slab_x -= GRIDX; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + double value = flistout[send_offset[task0] + send_count[task0]++]; + + if(task0 != task1) + value += flistout[send_offset[task1] + send_count[task1]++]; +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_yy = slab_y + 1; + + if(slab_y >= GRIDY) + slab_y -= GRIDY; + + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + + int column0 = slab_x * GRIDY + slab_y; + int column1 = slab_x * GRIDY + slab_yy; + int column2 = slab_xx * GRIDY + slab_y; + int column3 = slab_xx * GRIDY + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + double value = flistout[send_offset[task0] + send_count[task0]++]; + + if(task1 != task0) + value += flistout[send_offset[task1] + send_count[task1]++]; + + if(task2 != task1 && task2 != task0) + value += flistout[send_offset[task2] + send_count[task2]++]; + + if(task3 != task0 && task3 != task1 && task3 != task2) + value += flistout[send_offset[task3] + send_count[task3]++]; +#endif /* #ifndef FFT_COLUMN_BASED */ + if(dim < 0) + { +#ifdef EVALPOTENTIAL + P[i].PM_Potential += value * fac; +#endif /* #ifdef EVALPOTENTIAL */ + } + else + P[i].GravPM[dim] += value; + } + } + + int j; + /* restore total Sndpm_count */ + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + myfree(flistout); + myfree(flistin); +} +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + +/*! \brief Calculates the long-range periodic force given the particle + * positions using the PM method. + * + * The force is Gaussian filtered with Asmth, given in + * mesh-cell units. We carry out a CIC charge assignment, and compute the + * potential by fast Fourier transform methods. The potential is + * finite-differenced using a 4-point finite differencing formula, and the + * forces are interpolated tri-linearly to the particle positions. The CIC + * kernel is deconvolved. + * + * \param[in] mode For mode=0, normal force calculation, mode=1, only density + * field construction for a power spectrum calculation. In the + * later case, typelist flags the particle types that should be + * included in the density field. + * \param[in] typelist Flags of particle types included in power spectrum + * calculation. + * + * \return void + */ +void pmforce_periodic(int mode, int *typelist) +{ + int x, y, z, xx, yy, zz; + + double tstart = second(); + + if(mode == 0) + mpi_printf("PM-PERIODIC: Starting periodic PM calculation. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + +#ifndef NUMPART_PER_TASK_LARGE + if((((long long)NumPart) << 3) >= (((long long)1) << 31)) + terminate("We are dealing with a too large particle number per MPI rank - enabling NUMPART_PER_TASK_LARGE might help."); +#endif /* #ifndef NUMPART_PER_TASK_LARGE */ + + double asmth2 = All.Asmth[0] * All.Asmth[0]; + double d = All.BoxSize / PMGRID; + double dhalf = 0.5 * d; + + double fac = 4 * M_PI * All.G / (pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ); /* to get potential */ + + fac *= 1 / (2 * d); /* for finite differencing */ + +#ifdef PM_ZOOM_OPTIMIZED + pmforce_zoom_optimized_prepare_density(mode, typelist); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_uniform_optimized_prepare_density(mode); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + + /* allocate the memory to hold the FFT fields */ + + forcegrid = (fft_real *)mymalloc("forcegrid", maxfftsize * sizeof(fft_real)); + + workspace = forcegrid; + +#ifndef FFT_COLUMN_BASED + fft_of_rhogrid = (fft_complex *)&rhogrid[0]; +#else /* #ifndef FFT_COLUMN_BASED */ + fft_of_rhogrid = (fft_complex *)&workspace[0]; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* Do the FFT of the density field */ +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], 1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, rhogrid, workspace, 1); /* result is in workspace, not in rhogrid ! */ +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + if(mode != 0) + { + /* used to measure powerspectrum */ + } + else + { + /* multiply with Green's function in order to obtain the potential (or forces for spectral diffencing) */ + + double kfacx = 2.0 * M_PI / (GRIDX * d); + double kfacy = 2.0 * M_PI / (GRIDY * d); + double kfacz = 2.0 * M_PI / (GRIDZ * d); + +#ifdef FFT_COLUMN_BASED + for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++) + { + large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX; + y = ipcell / (GRIDX * GRIDz); + int yr = ipcell % (GRIDX * GRIDz); + z = yr / GRIDX; + x = yr % GRIDX; +#else /* #ifdef FFT_COLUMN_BASED */ + for(x = 0; x < GRIDX; x++) + for(y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++) + for(z = 0; z < GRIDz; z++) + { +#endif /* #ifdef FFT_COLUMN_BASED #else */ + if(x >= (GRIDX / 2)) + xx = x - GRIDX; + else + xx = x; + if(y >= (GRIDY / 2)) + yy = y - GRIDY; + else + yy = y; + if(z >= (GRIDZ / 2)) + zz = z - GRIDZ; + else + zz = z; + + double kx = kfacx * xx; + double ky = kfacy * yy; + double kz = kfacz * zz; + + double k2 = kx * kx + ky * ky + kz * kz; + + if(k2 > 0) + { + double smth = -exp(-k2 * asmth2) / k2; + + /* do deconvolution */ + + double fx = 1, fy = 1, fz = 1; + + if(xx != 0) + { + fx = kx * dhalf; + fx = sin(fx) / fx; + } + if(yy != 0) + { + fy = ky * dhalf; + fy = sin(fy) / fy; + } + if(zz != 0) + { + fz = kz * dhalf; + fz = sin(fz) / fz; + } + + double ff = 1 / (fx * fy * fz); + double deconv = ff * ff * ff * ff; + + smth *= deconv; /* deconvolution */ + +#ifndef FFT_COLUMN_BASED + large_array_offset ip = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z; +#endif /* #ifndef FFT_COLUMN_BASED */ + + fft_of_rhogrid[ip][0] *= smth; + fft_of_rhogrid[ip][1] *= smth; + } + } + +#ifdef FFT_COLUMN_BASED + if(myplan.second_transposed_firstcol == 0) + fft_of_rhogrid[0][0] = fft_of_rhogrid[0][1] = 0.0; +#else /* #ifdef FFT_COLUMN_BASED */ + if(myplan.slabstart_y == 0) + fft_of_rhogrid[0][0] = fft_of_rhogrid[0][1] = 0.0; +#endif /* #ifdef FFT_COLUMN_BASED #else */ + + /* Do the inverse FFT to get the potential/forces */ + +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], -1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, workspace, rhogrid, -1); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* Now rhogrid holds the potential/forces */ + +#ifdef EVALPOTENTIAL +#ifdef PM_ZOOM_OPTIMIZED + pmforce_zoom_optimized_readout_forces_or_potential(-1); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_uniform_optimized_readout_forces_or_potential(-1); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ +#endif /* #ifdef EVALPOTENTIAL */ + + /* get the force components by finite differencing of the potential for each dimension, + * and send the results back to the right CPUs + */ + for(int dim = 2; dim >= 0; dim--) /* Calculate each component of the force. */ + { + /* we do the x component last, because for differencing the potential in the x-direction, we need to construct the transpose + */ + +#ifndef FFT_COLUMN_BASED + if(dim == 0) + { + my_slab_transposeA(&myplan, rhogrid, + forcegrid); /* compute the transpose of the potential field for finite differencing */ + /* note: for the x-direction, we difference the transposed field */ + + for(x = 0; x < GRIDX; x++) + for(y = 0; y < myplan.nslab_y; y++) + for(z = 0; z < GRIDZ; z++) + { + int xrr = x + 2, xll = x - 2, xr = x + 1, xl = x - 1; + if(xr >= GRIDX) + xr -= GRIDX; + if(xrr >= GRIDX) + xrr -= GRIDX; + if(xl < 0) + xl += GRIDX; + if(xll < 0) + xll += GRIDX; + + forcegrid[NI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[NI(xl, y, z)] - rhogrid[NI(xr, y, z)]) - + (1.0 / 6) * (rhogrid[NI(xll, y, z)] - rhogrid[NI(xrr, y, z)])); + } + + my_slab_transposeB(&myplan, forcegrid, rhogrid); /* reverse the transpose from above */ + } + else + { + for(y = 0; y < GRIDY; y++) + for(x = 0; x < myplan.nslab_x; x++) + for(z = 0; z < GRIDZ; z++) + { + if(dim == 1) + { + int yr = y + 1, yl = y - 1, yrr = y + 2, yll = y - 2; + if(yr >= GRIDY) + yr -= GRIDY; + if(yrr >= GRIDY) + yrr -= GRIDY; + if(yl < 0) + yl += GRIDY; + if(yll < 0) + yll += GRIDY; + + forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, yl, z)] - rhogrid[FI(x, yr, z)]) - + (1.0 / 6) * (rhogrid[FI(x, yll, z)] - rhogrid[FI(x, yrr, z)])); + } + else if(dim == 2) + { + int zr = z + 1, zl = z - 1, zrr = z + 2, zll = z - 2; + if(zr >= GRIDZ) + zr -= GRIDZ; + if(zrr >= GRIDZ) + zrr -= GRIDZ; + if(zl < 0) + zl += GRIDZ; + if(zll < 0) + zll += GRIDZ; + + forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, y, zl)] - rhogrid[FI(x, y, zr)]) - + (1.0 / 6) * (rhogrid[FI(x, y, zll)] - rhogrid[FI(x, y, zrr)])); + } + } + } + +#else /* #ifndef FFT_COLUMN_BASED */ + + if(dim == 2) + { + for(large_array_offset i = 0; i < myplan.base_ncol; i++) + { + fft_real *forcep = &forcegrid[GRID2 * i]; + fft_real *potp = &rhogrid[GRID2 * i]; + + for(int z = 0; z < GRIDZ; z++) + { + int zr = z + 1; + int zl = z - 1; + int zrr = z + 2; + int zll = z - 2; + + if(zr >= GRIDZ) + zr -= GRIDZ; + if(zrr >= GRIDZ) + zrr -= GRIDZ; + if(zl < 0) + zl += GRIDZ; + if(zll < 0) + zll += GRIDZ; + + forcep[z] = fac * ((4.0 / 3) * (potp[zl] - potp[zr]) - (1.0 / 6) * (potp[zll] - potp[zrr])); + } + } + } + else if(dim == 1) + { + fft_real *scratch = mymalloc("scratch", myplan.fftsize * sizeof(fft_real)); /* need a third field as scratch space */ + memcpy(scratch, rhogrid, myplan.fftsize * sizeof(fft_real)); + + my_fft_swap23(&myplan, scratch, forcegrid); + + for(large_array_offset i = 0; i < myplan.ncol_XZ; i++) + { + fft_real *forcep = &scratch[GRIDY * i]; + fft_real *potp = &forcegrid[GRIDY * i]; + + for(int y = 0; y < GRIDY; y++) + { + int yr = y + 1; + int yl = y - 1; + int yrr = y + 2; + int yll = y - 2; + + if(yr >= GRIDY) + yr -= GRIDY; + if(yrr >= GRIDY) + yrr -= GRIDY; + if(yl < 0) + yl += GRIDY; + if(yll < 0) + yll += GRIDY; + + forcep[y] = fac * ((4.0 / 3) * (potp[yl] - potp[yr]) - (1.0 / 6) * (potp[yll] - potp[yrr])); + } + } + + my_fft_swap23back(&myplan, scratch, forcegrid); + myfree(scratch); + } + else if(dim == 0) + { + fft_real *scratch = mymalloc("scratch", myplan.fftsize * sizeof(fft_real)); /* need a third field as scratch space */ + memcpy(scratch, rhogrid, myplan.fftsize * sizeof(fft_real)); + + my_fft_swap13(&myplan, scratch, forcegrid); + + for(large_array_offset i = 0; i < myplan.ncol_YZ; i++) + { + fft_real *forcep = &scratch[GRIDX * i]; + fft_real *potp = &forcegrid[GRIDX * i]; + + for(int x = 0; x < GRIDX; x++) + { + int xr = x + 1; + int xl = x - 1; + int xrr = x + 2; + int xll = x - 2; + + if(xr >= GRIDX) + xr -= GRIDX; + if(xrr >= GRIDX) + xrr -= GRIDX; + if(xl < 0) + xl += GRIDX; + if(xll < 0) + xll += GRIDX; + + forcep[x] = fac * ((4.0 / 3) * (potp[xl] - potp[xr]) - (1.0 / 6) * (potp[xll] - potp[xrr])); + } + } + + my_fft_swap13back(&myplan, scratch, forcegrid); + myfree(scratch); + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + +#ifdef PM_ZOOM_OPTIMIZED + pmforce_zoom_optimized_readout_forces_or_potential(dim); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_uniform_optimized_readout_forces_or_potential(dim); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + } + } + + /* free stuff */ + + myfree(forcegrid); + myfree(rhogrid); + +#ifdef PM_ZOOM_OPTIMIZED + myfree(localfield_recvcount); + myfree(localfield_offset); + myfree(localfield_sendcount); + myfree(localfield_first); + myfree(localfield_data); + myfree(localfield_globalindex); + myfree(part); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + myfree(partin); + myfree(Rcvpm_offset); + myfree(Rcvpm_count); + myfree(Sndpm_offset); + myfree(Sndpm_count); +#endif /* #ifdef PM_ZOOM_OPTIMIZED */ + + double tend = second(); + + if(mode == 0) + mpi_printf("PM-PERIODIC: done. (took %g seconds)\n", timediff(tstart, tend)); +} + +#ifdef PM_ZOOM_OPTIMIZED + +/*! \brief Sort function for 'part' array indices. + * + * Sorts the indices into the 'part' array by the global index of the + * corresponding 'part_slab_data' struct. + * + * \param[in] a Index to be compared. + * \param[in] b Index to be compared. + * + * \return sort result + */ +static int pm_periodic_compare_sortindex(const void *a, const void *b) +{ + if(part[*(int *)a].globalindex < part[*(int *)b].globalindex) + return -1; + + if(part[*(int *)a].globalindex > part[*(int *)b].globalindex) + return +1; + + return 0; +} + +/*! \brief Implements the sorting function for mysort_pmperiodic(). + * + * The index array is sorted using a merge sort algorithm. + * + * \param[in, out] b Index array to sort. + * \param[in] n Number of elements to sort. + * \param[out] t Temporary buffer array. + * + * \return void + */ +static void msort_pmperiodic_with_tmp(large_numpart_type *b, size_t n, large_numpart_type *t) +{ + large_numpart_type *tmp; + large_numpart_type *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + msort_pmperiodic_with_tmp(b1, n1, t); + msort_pmperiodic_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(part[*b1].globalindex <= part[*b2].globalindex) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(large_numpart_type)); + + memcpy(b, t, (n - n2) * sizeof(large_numpart_type)); +} + +/*! \brief Sort the index array b of n entries using the sort kernel + * cmp. + * + * The parameter s is set to sizeof(int). The index array b is sorted + * according to the globalindex field of the referenced item in the 'part' + * array. + * + * \param[in, out] b The index array to sort. + * \param[in] n Number of entries in array b. + * \param[in] s Size of each entry (must be sizeof(int)). + * \param[in] cmp Comparison function. + */ +static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)) +{ + const size_t size = n * s; + + large_numpart_type *tmp = (large_numpart_type *)mymalloc("tmp", size); + + msort_pmperiodic_with_tmp((large_numpart_type *)b, n, tmp); + + myfree(tmp); +} +#endif /* #ifdef PM_ZOOM_OPTIMIZED */ + +#endif /* #if defined(PMGRID) */ diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c b/src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c new file mode 100644 index 0000000000..6ace982b68 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c @@ -0,0 +1,905 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/pm/pm_periodic2d.c + * \date 05/2018 + * \brief Routines for periodic PM-force computation in 2d. + * \details contains functions: + * void pm2d_init_periodic(void) + * void pm2d_init_periodic_allocate(void) + * void pm2d_init_periodic_free(void) + * void pm2d_force_periodic(int mode) + * int pm2d_periodic_compare_sortindex(const void *a, const + * void *b) + * static void pm2d_msort_pmperiodic_with_tmp(int *b, size_t n, + * int *t) + * void pm2d_mysort_pmperiodic(void *b, size_t n, size_t s, + * int (*cmp) (const void *, const void *)) + * void pm2d_periodic_transposeA(fftw_real * field, + * fftw_real * scratch) + * void pm2d_periodic_transposeB(fftw_real * field, + * fftw_real * scratch) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#ifdef PMGRID +#ifndef GRAVITY_NOT_PERIODIC +#ifdef TWODIMS + +#ifdef NOTYPEPREFIX_FFTW +#include +#else /* #ifdef NOTYPEPREFIX_FFTW */ +#ifdef DOUBLEPRECISION_FFTW +#include /* double precision FFTW */ +#else /* #ifdef DOUBLEPRECISION_FFTW */ +#include +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ +#endif /* #ifdef NOTYPEPREFIX_FFTW #else */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#define PMGRID2 (2 * (PMGRID / 2 + 1)) + +#if(PMGRID > 1024) +typedef long long large_array_offset; +#else /* #if (PMGRID > 1024) */ +typedef unsigned int large_array_offset; +#endif /* #if (PMGRID > 1024) #else */ + +#define d_fftw_real fftw_real + +static rfftwnd_mpi_plan fft_forward_plan, fft_inverse_plan; + +static int slab_to_task[PMGRID]; +static int *slabs_x_per_task; +static int *first_slab_x_of_task; + +static int slabstart_x, nslab_x, slabstart_y, nslab_y, smallest_slab; + +static int fftsize, maxfftsize; + +static fftw_real *rhogrid, *forcegrid, *workspace; +static d_fftw_real *d_rhogrid, *d_forcegrid, *d_workspace; + +static fftw_complex *fft_of_rhogrid; + +static MyFloat to_slab_fac; + +void pm2d_periodic_transposeA(fftw_real *field, fftw_real *scratch); +void pm2d_periodic_transposeB(fftw_real *field, fftw_real *scratch); +int pm2d_periodic_compare_sortindex(const void *a, const void *b); + +/*! \brief Data for fft slab. + */ +static struct part_slab_data +{ + large_array_offset globalindex; + int partindex; + int localindex; +} * part; + +static int *part_sortindex; + +/*! \brief This routines generates the FFTW-plans to carry out the parallel + * FFTs later on. Some auxiliary variables are also initialized. + * + * \return void + */ +void pm2d_init_periodic(void) +{ + int i; + int slab_to_task_local[PMGRID]; + + All.Asmth[0] = ASMTH * All.BoxSize / PMGRID; + All.Rcut[0] = RCUT * All.Asmth[0]; + + /* Set up the FFTW plan files. */ + + fft_forward_plan = rfftw2d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); + fft_inverse_plan = rfftw2d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); + + /* Workspace out the ranges on each processor. */ + + rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize); + + for(i = 0; i < PMGRID; i++) + slab_to_task_local[i] = 0; + + for(i = 0; i < nslab_x; i++) + slab_to_task_local[slabstart_x + i] = ThisTask; + + MPI_Allreduce(slab_to_task_local, slab_to_task, PMGRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + MPI_Allreduce(&nslab_x, &smallest_slab, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + + slabs_x_per_task = (int *)mymalloc("slabs_per_task", NTask * sizeof(int)); + MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_x_per_task, 1, MPI_INT, MPI_COMM_WORLD); + + first_slab_x_of_task = (int *)mymalloc("first_slab_of_task", NTask * sizeof(int)); + MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_x_of_task, 1, MPI_INT, MPI_COMM_WORLD); + + to_slab_fac = PMGRID / All.BoxSize; + + MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + printf("maxfftsize=%d PMGRID=%d\n", maxfftsize, PMGRID); +} + +/*! \brief Allocates memory for 2d PM algorithm. + * + * This function allocates the memory neeed to compute the long-range PM + * force. Three fields are used, one to hold the density (and its FFT, and + * then the real-space potential), one to hold the force field obtained by + * finite differencing, and finally a workspace field, which is used both as + * workspace for the parallel FFT, and as buffer for the communication + * algorithm used in the force computation. + * + * \return void + */ +void pm2d_init_periodic_allocate(void) +{ + double bytes_tot = 0; + size_t bytes; + + /* allocate the memory to hold the FFT fields */ + + rhogrid = (fftw_real *)mymalloc("rhogrid", bytes = maxfftsize * sizeof(d_fftw_real)); + bytes_tot += bytes; + + forcegrid = (fftw_real *)mymalloc("forcegrid", bytes = maxfftsize * sizeof(d_fftw_real)); + bytes_tot += bytes; + + part = (struct part_slab_data *)mymalloc("part", bytes = 4 * NumPart * sizeof(struct part_slab_data)); + bytes_tot += bytes; + + part_sortindex = (int *)mymalloc("part_sortindex", bytes = 4 * NumPart * sizeof(int)); + bytes_tot += bytes; + + if(ThisTask == 0) + printf("Using %g MByte for periodic FFT computation. (presently allocated=%g MB)\n", bytes_tot / (1024.0 * 1024.0), + AllocatedBytes / (1024.0 * 1024.0)); + + workspace = forcegrid; + + fft_of_rhogrid = (fftw_complex *)&rhogrid[0]; + + d_rhogrid = (d_fftw_real *)rhogrid; + d_forcegrid = (d_fftw_real *)forcegrid; + d_workspace = (d_fftw_real *)workspace; +} + +/*! \brief This routine frees the space allocated for the parallel FFT + * algorithm. + * + * \return void + */ +void pm2d_init_periodic_free(void) +{ + /* allocate the memory to hold the FFT fields */ + myfree(part_sortindex); + myfree(part); + myfree(forcegrid); + myfree(rhogrid); +} + +/*! \brief Long range periodic 2d gravity. + * + * Calculates the long-range periodic force given the particle positions + * using the PM method. The force is Gaussian filtered with Asmth, given in + * mesh-cell units. We carry out a CIC charge assignment, and compute the + * potenial by Fourier transform methods. The potential is finite differenced + * using a 4-point finite differencing formula, and the forces are + * interpolated tri-linearly to the particle positions. The CIC kernel is + * deconvolved. Note that the particle distribution is not in the slab + * decomposition that is used for the FFT. Instead, overlapping patches + * between local domains and FFT slabs are communicated as needed. + * + * \param[in] mode 0: normal PM force; 1: calculate mesh correction vector. + * + * \return void + */ +void pm2d_force_periodic(int mode) +{ + double k2, kx, ky, smth; + double dx, dy, weight; + double fx, fy, ff; + double asmth2, fac, acc_dim; + int i, j, N, slab, level, sendTask, recvTask, task; + int x, y, yl, yr, yll, yrr, ip, dim; + int slab_x, slab_y; + int slab_xx, slab_yy; + int num_on_grid, num_field_points, pindex, xx, yy; + MPI_Status status; + int *localfield_count, *localfield_first, *localfield_offset, *localfield_togo; + large_array_offset offset, *localfield_globalindex, *import_globalindex; + d_fftw_real *localfield_d_data, *import_d_data; + fftw_real *localfield_data, *import_data; + + if(ThisTask == 0) + { + printf("Starting periodic PM-2d calculation. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + myflush(stdout); + } + + asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize; + asmth2 *= asmth2; + + fac = All.G / (M_PI * All.BoxSize); /* to get potential */ + fac *= 1 / (2 * All.BoxSize / PMGRID); /* for finite differencing */ + + if(mode == 1) + { + fac *= 1.0 / (All.G) * All.BoxSize; + } + else + { + fac *= All.BoxSize; + } + + pm2d_init_periodic_allocate(); + + if(mode == 0) + N = NumPart; + else + N = NumGas; + + /* determine the cells each particles accesses */ + for(i = 0, num_on_grid = 0; i < N; i++) + { + slab_x = (int)(to_slab_fac * P[i].Pos[0]); + slab_y = (int)(to_slab_fac * P[i].Pos[1]); + + if(slab_x >= PMGRID) + slab_x = PMGRID - 1; + if(slab_y >= PMGRID) + slab_y = PMGRID - 1; + + for(xx = 0; xx < 2; xx++) + for(yy = 0; yy < 2; yy++) + { + slab_xx = slab_x + xx; + slab_yy = slab_y + yy; + + if(slab_xx >= PMGRID) + slab_xx -= PMGRID; + if(slab_yy >= PMGRID) + slab_yy -= PMGRID; + + offset = (PMGRID2 * slab_xx + slab_yy); + + part[num_on_grid].partindex = (i << 2) + (xx << 1) + yy; + part[num_on_grid].globalindex = offset; + part_sortindex[num_on_grid] = num_on_grid; + num_on_grid++; + } + } + + /* note: num_on_grid will be 4 times larger than the particle number, + but num_field_points will generally be much smaller */ + + /* bring the part-field into the order of the accessed cells. This allow the removal of duplicates */ + pm2d_mysort_pmperiodic(part_sortindex, num_on_grid, sizeof(int), pm2d_periodic_compare_sortindex); + + /* determine the number of unique field points */ + for(i = 0, num_field_points = 0; i < num_on_grid; i++) + { + if(i > 0) + if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex) + continue; + + num_field_points++; + } + + /* allocate the local field */ + localfield_globalindex = (large_array_offset *)mymalloc("first_slab_of_task", num_field_points * sizeof(large_array_offset)); + localfield_d_data = (d_fftw_real *)mymalloc("localfield_d_data", num_field_points * sizeof(d_fftw_real)); + localfield_data = (fftw_real *)localfield_d_data; + localfield_first = (int *)mymalloc("localfield_d_data", NTask * sizeof(int)); + localfield_count = (int *)mymalloc("localfield_count", NTask * sizeof(int)); + localfield_offset = (int *)mymalloc("localfield_count", NTask * sizeof(int)); + localfield_togo = (int *)mymalloc("localfield_togo", NTask * NTask * sizeof(int)); + + for(i = 0; i < NTask; i++) + { + localfield_first[i] = 0; + localfield_count[i] = 0; + } + + /* establish the cross link between the part[] array and the local list of + mesh points. Also, count on which CPU how many of the needed field points are stored */ + for(i = 0, num_field_points = 0; i < num_on_grid; i++) + { + if(i > 0) + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + + part[part_sortindex[i]].localindex = num_field_points; + + if(i > 0) + if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex) + continue; + + localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex; + + slab = part[part_sortindex[i]].globalindex / PMGRID2; + task = slab_to_task[slab]; + if(localfield_count[task] == 0) + localfield_first[task] = num_field_points; + localfield_count[task]++; + } + num_field_points++; + + for(i = 1, localfield_offset[0] = 0; i < NTask; i++) + localfield_offset[i] = localfield_offset[i - 1] + localfield_count[i - 1]; + + /* now bin the local particle data onto the mesh list */ + + for(i = 0; i < num_field_points; i++) + localfield_d_data[i] = 0; + + for(i = 0; i < num_on_grid; i += 4) + { + pindex = (part[i].partindex >> 2); + + slab_x = (int)(to_slab_fac * P[pindex].Pos[0]); + slab_y = (int)(to_slab_fac * P[pindex].Pos[1]); + + dx = to_slab_fac * P[pindex].Pos[0] - slab_x; + dy = to_slab_fac * P[pindex].Pos[1] - slab_y; + + weight = P[pindex].Mass; + + localfield_d_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy); + localfield_d_data[part[i + 1].localindex] += weight * (1.0 - dx) * dy; + localfield_d_data[part[i + 2].localindex] += weight * (dx) * (1.0 - dy); + localfield_d_data[part[i + 3].localindex] += weight * (dx)*dy; + } + + /* clear local FFT-mesh density field */ + for(i = 0; i < fftsize; i++) + d_rhogrid[i] = 0; + + /* exchange data and add contributions to the local mesh-path */ + + MPI_Allgather(localfield_count, NTask, MPI_INT, localfield_togo, NTask, MPI_INT, MPI_COMM_WORLD); + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + sendTask = ThisTask; + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_d_data = + (d_fftw_real *)mymalloc("import_d_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(d_fftw_real)); + import_globalindex = (large_array_offset *)mymalloc( + "import_d_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(large_array_offset)); + + if(localfield_togo[sendTask * NTask + recvTask] > 0 || localfield_togo[recvTask * NTask + sendTask] > 0) + { + MPI_Sendrecv(localfield_d_data + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(d_fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + import_d_data, localfield_togo[recvTask * NTask + sendTask] * sizeof(d_fftw_real), MPI_BYTE, recvTask, + TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + MPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_B, import_globalindex, + localfield_togo[recvTask * NTask + sendTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_B, MPI_COMM_WORLD, &status); + } + } + else + { + import_d_data = localfield_d_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_togo[recvTask * NTask + sendTask]; i++) + { + /* determine offset in local FFT slab */ + offset = import_globalindex[i] - first_slab_x_of_task[ThisTask] * PMGRID2; + + d_rhogrid[offset] += import_d_data[i]; + } + + if(level > 0) + { + myfree(import_globalindex); + myfree(import_d_data); + } + } + } + + /* Do the FFT of the density field */ + + rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); + + /* multiply with Green's function for the potential */ + + for(y = slabstart_y; y < slabstart_y + nslab_y; y++) + for(x = 0; x < PMGRID; x++) + { + if(x > PMGRID / 2) + kx = x - PMGRID; + else + kx = x; + if(y > PMGRID / 2) + ky = y - PMGRID; + else + ky = y; + + k2 = kx * kx + ky * ky; + + if(k2 > 0) + { + smth = -exp(-k2 * asmth2) / k2; + + /* do deconvolution */ + + fx = fy = 1; + if(kx != 0) + { + fx = (M_PI * kx) / PMGRID; + fx = sin(fx) / fx; + } + if(ky != 0) + { + fy = (M_PI * ky) / PMGRID; + fy = sin(fy) / fy; + } + ff = 1 / (fx * fy); + smth *= ff * ff * ff * ff; + + /* end deconvolution */ + + ip = PMGRID * (y - slabstart_y) + x; + fft_of_rhogrid[ip].re *= smth; + fft_of_rhogrid[ip].im *= smth; + } + } + + if(slabstart_y == 0) + fft_of_rhogrid[0].re = fft_of_rhogrid[0].im = 0.0; + + /* Do the inverse FFT to get the potential */ + + rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); + +#ifdef EVALPOTENTIAL /* now read out the potential */ + if(mode == 0) + { + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + sendTask = ThisTask; + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fftw_real *)mymalloc("import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(fftw_real)); + import_globalindex = (large_array_offset *)mymalloc( + "import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(large_array_offset)); + + if(localfield_togo[sendTask * NTask + recvTask] > 0 || localfield_togo[recvTask * NTask + sendTask] > 0) + { + MPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, import_globalindex, + localfield_togo[recvTask * NTask + sendTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_togo[recvTask * NTask + sendTask]; i++) + { + offset = import_globalindex[i] - first_slab_x_of_task[ThisTask] * ((large_array_offset)PMGRID2); + import_data[i] = rhogrid[offset]; + } + + if(level > 0) + { + MPI_Sendrecv(import_data, localfield_togo[recvTask * NTask + sendTask] * sizeof(fftw_real), MPI_BYTE, recvTask, + TAG_NONPERIOD_A, localfield_data + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + MPI_COMM_WORLD, &status); + + myfree(import_globalindex); + myfree(import_data); + } + } + } + + /* read out the potential values, which all have been assembled in localfield_data */ + + double pot; + + for(i = 0, j = 0; i < N; i++) + { + while(j < num_on_grid && (part[j].partindex >> 2) != i) + j++; + + slab_x = (int)(to_slab_fac * P[i].Pos[0]); + dx = to_slab_fac * P[i].Pos[0] - slab_x; + + slab_y = (int)(to_slab_fac * P[i].Pos[1]); + dy = to_slab_fac * P[i].Pos[1] - slab_y; + + pot = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) + + localfield_data[part[j + 1].localindex] * (1.0 - dx) * dy + localfield_data[part[j + 2].localindex] * dx * (1.0 - dy) + + localfield_data[part[j + 3].localindex] * dx * dy; + + P[i].PM_Potential += pot * fac * (2 * All.BoxSize / PMGRID); + /* compensate the finite differencing factor */; + } + } +#endif /* #ifdef EVALPOTENTIAL */ + + /* get the force components by finite differencing the potential for each dimension, + and send back the results to the right CPUs */ + + for(dim = 1; dim >= 0; dim--) /* Calculate each component of the force. */ + { /* we do the x component last, because for differencing the potential in the x-direction, we need to contruct the transpose */ + if(dim == 0) + pm2d_periodic_transposeA(rhogrid, forcegrid); /* compute the transpose of the potential field */ + + for(xx = slabstart_x; xx < (slabstart_x + nslab_x); xx++) + for(y = 0; y < PMGRID; y++) + { + x = xx - slabstart_x; + + yrr = yll = yr = yl = y; + + yr = y + 1; + yl = y - 1; + yrr = y + 2; + yll = y - 2; + if(yr >= PMGRID) + yr -= PMGRID; + if(yrr >= PMGRID) + yrr -= PMGRID; + if(yl < 0) + yl += PMGRID; + if(yll < 0) + yll += PMGRID; + + if(dim == 0) + { + forcegrid[x + y * nslab_x] = fac * ((4.0 / 3) * (rhogrid[(x + yl * nslab_x)] - rhogrid[(x + yr * nslab_x)]) - + (1.0 / 6) * (rhogrid[(x + yll * nslab_x)] - rhogrid[(x + yrr * nslab_x)])); + } + else + { + forcegrid[PMGRID2 * x + y] = fac * ((4.0 / 3) * (rhogrid[PMGRID2 * x + yl] - rhogrid[PMGRID2 * x + yr]) - + (1.0 / 6) * (rhogrid[PMGRID2 * x + yll] - rhogrid[PMGRID2 * x + yrr])); + } + } + + if(dim == 0) + pm2d_periodic_transposeB(forcegrid, rhogrid); /* compute the transpose of the potential field */ + + /* send the force components to the right processors */ + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + sendTask = ThisTask; + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fftw_real *)mymalloc("import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(fftw_real)); + import_globalindex = (large_array_offset *)mymalloc( + "import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(large_array_offset)); + + if(localfield_togo[sendTask * NTask + recvTask] > 0 || localfield_togo[recvTask * NTask + sendTask] > 0) + { + MPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, import_globalindex, + localfield_togo[recvTask * NTask + sendTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_togo[recvTask * NTask + sendTask]; i++) + { + /* determine offset in local FFT slab */ + offset = import_globalindex[i] - first_slab_x_of_task[ThisTask] * PMGRID2; + import_data[i] = forcegrid[offset]; + } + + if(level > 0) + { + MPI_Sendrecv(import_data, localfield_togo[recvTask * NTask + sendTask] * sizeof(fftw_real), MPI_BYTE, recvTask, + TAG_NONPERIOD_A, localfield_data + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + MPI_COMM_WORLD, &status); + + myfree(import_globalindex); + myfree(import_data); + } + } + } + + /* read out the forces, which all have been assembled in localfield_data */ + + for(i = 0, j = 0; i < N; i++) + { + while(j < num_on_grid && (part[j].partindex >> 2) != i) + j++; + + slab_x = (int)(to_slab_fac * P[i].Pos[0]); + dx = to_slab_fac * P[i].Pos[0] - slab_x; + + slab_y = (int)(to_slab_fac * P[i].Pos[1]); + dy = to_slab_fac * P[i].Pos[1] - slab_y; + + acc_dim = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) + + localfield_data[part[j + 1].localindex] * (1.0 - dx) * dy + + localfield_data[part[j + 2].localindex] * (dx) * (1.0 - dy) + localfield_data[part[j + 3].localindex] * (dx)*dy; + + P[i].GravPM[dim] += acc_dim; + } + } + + /* free locallist */ + myfree(localfield_togo); + myfree(localfield_offset); + myfree(localfield_count); + myfree(localfield_first); + myfree(localfield_d_data); + myfree(localfield_globalindex); + + pm2d_init_periodic_free(); + + mpi_printf("done PM-2d.\n"); +} + +/*! \brief Compares two objects of type part_slab_data. + * + * According to element globalindex. + * + * \param[in] a Index of first object in part array. + * \param[in] b Index of second object in part array. + * + * \return (-1,0,1); -1 if part[a].globalindex < part[b].globalindex + */ +int pm2d_periodic_compare_sortindex(const void *a, const void *b) +{ + if(part[*(int *)a].globalindex < part[*(int *)b].globalindex) + return -1; + + if(part[*(int *)a].globalindex > part[*(int *)b].globalindex) + return +1; + + return 0; +} + +/*! \brief Merge sort algorithm for 2d periodic particle mesh algorithm. + * + * \param[in, out] b Array to be sorted. + * \param[in] n Size of array b. + * \param[in, out] t Temporary array. + * + * \return void + */ +static void pm2d_msort_pmperiodic_with_tmp(int *b, size_t n, int *t) +{ + int *tmp; + int *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + pm2d_msort_pmperiodic_with_tmp(b1, n1, t); + pm2d_msort_pmperiodic_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(part[*b1].globalindex <= part[*b2].globalindex) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(int)); + + memcpy(b, t, (n - n2) * sizeof(int)); +} + +/*! \brief Wrapper for sorting algorithm in 2d periodic PM algorithm. + * + * Uses pm2d_msort_pmperiodic_with_tmp. + * + * \param[in, out] b Array to be sorted. + * \param[in] n Number of elements in array b. + * \param[in] s Size of individual element of b (for memory allocation). + * \param[in] cmp Compare function (unused). + * + * \return void + */ +void pm2d_mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)) +{ + const size_t size = n * s; + + int *tmp = (int *)mymalloc("tmp", size); + + pm2d_msort_pmperiodic_with_tmp((int *)b, n, tmp); + + myfree(tmp); +} + +/*! \brief Transpose operation for 2d fft. + * + * Used for transposing rhogrid. + * + * \param[in, out] field Field that needs to be transposed. + * \param[in, out] scratch Temporary data. + * + * \return void + */ +void pm2d_periodic_transposeA(fftw_real *field, fftw_real *scratch) +{ + int x, y, task; + + for(task = 0; task < NTask; task++) + for(x = 0; x < nslab_x; x++) + for(y = first_slab_x_of_task[task]; y < first_slab_x_of_task[task] + slabs_x_per_task[task]; y++) + { + scratch[(first_slab_x_of_task[task] * nslab_x + x * slabs_x_per_task[task] + (y - first_slab_x_of_task[task]))] = + field[PMGRID2 * x + y]; + } + +#ifndef NO_ISEND_IRECV_IN_DOMAIN + MPI_Request *requests; + int nrequests = 0; + + requests = (MPI_Request *)mymalloc(2 * NTask * sizeof(MPI_Request)); + + for(task = 0; task < NTask; task++) + { + MPI_Isend(scratch + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, + TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]); + + MPI_Irecv(field + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, + TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]); + } + + MPI_Waitall(nrequests, requests, MPI_STATUSES_IGNORE); + myfree(requests); +#else /* #ifndef NO_ISEND_IRECV_IN_DOMAIN */ + int ngrp; + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + task = ThisTask ^ ngrp; + + if(task < NTask) + { + MPI_Sendrecv(scratch + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, + task, TAG_KEY, field + first_slab_x_of_task[task] * nslab_x, + nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, TAG_KEY, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } +#endif /* #ifndef NO_ISEND_IRECV_IN_DOMAIN #else */ +} + +/*! \brief Transpose operation for 2d fft. + * + * Used for forcegrid transpose. + * + * \param[in, out] field Field that needs to be transposed. + * \param[in, out] scratch Temporary data. + * + * \return void + */ +void pm2d_periodic_transposeB(fftw_real *field, fftw_real *scratch) +{ + int x, y, task; + +#ifndef NO_ISEND_IRECV_IN_DOMAIN + MPI_Request *requests; + int nrequests = 0; + + requests = (MPI_Request *)mymalloc(2 * NTask * sizeof(MPI_Request)); + + for(task = 0; task < NTask; task++) + { + MPI_Isend(field + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, + TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]); + + MPI_Irecv(scratch + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, + TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]); + } + + MPI_Waitall(nrequests, requests, MPI_STATUSES_IGNORE); + myfree(requests); + +#else /* #ifndef NO_ISEND_IRECV_IN_DOMAIN */ + int ngrp; + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + task = ThisTask ^ ngrp; + + if(task < NTask) + { + MPI_Sendrecv(field + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, + task, TAG_KEY, scratch + first_slab_x_of_task[task] * nslab_x, + nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, TAG_KEY, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } +#endif /* #ifndef NO_ISEND_IRECV_IN_DOMAIN #else */ + + for(task = 0; task < NTask; task++) + for(x = 0; x < nslab_x; x++) + for(y = first_slab_x_of_task[task]; y < first_slab_x_of_task[task] + slabs_x_per_task[task]; y++) + { + field[PMGRID2 * x + y] = + scratch[(first_slab_x_of_task[task] * nslab_x + x * slabs_x_per_task[task] + (y - first_slab_x_of_task[task]))]; + } +} + +#endif /* #ifdef TWODIMS */ +#endif /* #ifndef GRAVITY_NOT_PERIODIC */ +#endif /* #ifdef PMGRID */ diff --git a/src/amuse/community/arepo/src/hydro/finite_volume_solver.c b/src/amuse/community/arepo/src/hydro/finite_volume_solver.c new file mode 100644 index 0000000000..287fe14bb4 --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/finite_volume_solver.c @@ -0,0 +1,1895 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/finite_volume_solver.c + * \date 05/2018 + * \brief Core algorithms of the finite-volume solver. + * \details contains functions: + * void compute_interface_fluxes(tessellation * T) + * void backup_face_areas(tessellation * T) + * void restore_face_areas(tessellation * T) + * int face_get_state(tessellation * T, int p, int i, struct + * state *st) + * void face_boundary_check_vertex(tessellation * T, int p, + * MyFloat * velx, MyFloat * vely, MyFloat * velz) + * void face_boundary_check(point * p, double *velx, double + * *vely, double *velz) + * int face_check_responsibility_of_this_task(tessellation * T, + * int p1, int p2, struct state *st_L, struct state *st_R) + * double face_timestep(struct state *state_L, struct state + * *state_R, double *hubble_a, double *atime) + * void state_convert_to_local_frame(struct state *st, double + * *vel_face, double hubble_a, double atime) + * void face_do_time_extrapolation(struct state *delta, + * struct state *st, double atime) + * void face_do_spatial_extrapolation(struct state *delta, + * struct state *st, struct state *st_other) + * void face_do_spatial_extrapolation_single_quantity(double + * *delta, double st, double st_other, MySingle * grad, + * double *dx, double *r) + * void face_add_extrapolations(struct state *st_face, struct + * state *delta_time, struct state *delta_space, struct + * fvs_stat *stat) + * void face_add_extrapolation(struct state *st_face, struct + * state *delta, struct fvs_stat *stat) + * void face_add_extrapolation_with_check(struct state *st_face, + * struct state *delta, struct fvs_stat *stat) + * void face_turn_velocities(struct state *st, struct geometry + * *geom) + * void solve_advection(struct state *st_L, struct state *st_R, + * struct state_face *st_face, struct geometry *geom, + * double *vel_face) + * void face_turnback_velocities(struct state_face *st_face, + * struct geometry *geom) + * void face_set_scalar_states_and_fluxes(struct state *st_L, + * struct state *st_R, struct state_face *st_face, struct + * fluxes *flux) + * void flux_convert_to_lab_frame(struct state *st_L, struct + * state *st_R, double *vel_face, struct fluxes *flux) + * void face_turn_momentum_flux(struct fluxes *flux, struct + * geometry *geom) + * void face_get_fluxes(struct state *st_L, struct state *st_R, + * struct state_face *st_face, struct fluxes *flux, struct + * geometry *geom, double *vel_face) + * void face_limit_fluxes(struct state *st_L, struct state + * *st_R, struct state *st_center_L, struct state + * *st_center_R, struct fluxes *flux, double dt, double + * *count, double *count_reduced) + * void face_clear_fluxes(struct fluxes *flux) + * void face_add_fluxes_advection(struct state_face *st_face, + * struct fluxes *flux, struct geometry *geom, double + * *vel_face) + * int flux_list_data_compare(const void *a, const void *b) + * void apply_flux_list(void) + * void fvs_initialize_statistics(struct fvs_stat *stat) + * void fvs_evaluate_statistics(struct fvs_stat *stat) + * void apply_spherical_source_terms() + * void add_spin_source_term_from_grid_movement() + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +/*! \brief Data needed for flux calculation. + */ +static struct flux_list_data +{ + int task, index; + double dM, dP[3]; +#ifdef MHD + double dB[3]; +#endif /* #ifdef MHD */ + +#ifndef ISOTHERM_EQS + double dEnergy; +#endif /* #ifndef ISOTHERM_EQS */ +#ifdef MAXSCALARS + double dConservedScalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ +} * FluxList; + +static int Nflux, MaxNflux; + +struct primexch *PrimExch; +struct grad_data *GradExch; + +/*! state on a face determined by Riemann solver */ +struct state_face state_face; + +/*! flux through a face */ +struct fluxes fluxes; + +struct geometry geom; + +#ifdef ONEDIMS_SPHERICAL +void apply_spherical_source_terms(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +static void face_add_extrapolation_with_check(struct state *st_face, struct state *delta, struct fvs_stat *stat); +static void fvs_initialize_statistics(struct fvs_stat *stat); +static void fvs_evaluate_statistics(struct fvs_stat *stat); + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS +void backup_face_areas(tessellation *T); +void restore_face_areas(tessellation *T); +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ + +/*! \brief Main routine to compute fluxes across interfaces given am mesh T. + * + * Adds these fluxes to conserved variables. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void compute_interface_fluxes(tessellation *T) +{ +#ifdef NOHYDRO + return; +#endif /* #ifdef NOHYDRO */ + TIMER_START(CPU_FLUXES); + + int i, j; + double count = 0, count_reduced = 0, tot_count, tot_count_reduced; + double face_dt, hubble_a, atime; + struct fvs_stat stat; +#ifdef MHD + double sqrtatime; +#endif /* #ifdef MHD */ + +#ifdef GODUNOV_STATS + FILE *fdstats; + char buf[1000]; + + sprintf(buf, "%s/godunov_stats_%d.txt", All.OutputDir, ThisTask); + if(!(fdstats = fopen(buf, "w"))) + terminate("error in opening file '%s'", buf); +#endif /* #ifdef GODUNOV_STATS */ + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS + backup_face_areas(T); +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ + + fvs_initialize_statistics(&stat); + + MaxNflux = T->Indi.AllocFacNflux; + Nflux = 0; + FluxList = mymalloc_movable(&FluxList, "FluxList", MaxNflux * sizeof(struct flux_list_data)); + + face *VF = T->VF; + point *DP = T->DP; + + for(i = 0; i < T->Nvf; i++) + { + struct state state_L, state_center_L, delta_time_L, delta_space_L; + struct state state_R, state_center_R, delta_time_R, delta_space_R; + + face_dt = 0; /* the default is that this face is not active */ + + /* calculate normal vectors */ + if(face_get_normals(T, i, &geom)) + continue; + + /* get the values of the states at the center of the cells */ + if(face_get_state(T, VF[i].p1, i, &state_center_L)) + continue; + + if(face_get_state(T, VF[i].p2, i, &state_center_R)) + continue; + + /* only treat faces where one of the two sides is active */ + if(!TimeBinSynchronized[state_center_L.timeBin] && !TimeBinSynchronized[state_center_R.timeBin]) + continue; + + /* clarify whether the face should be done by this task (it may be present also on another task) */ + if(face_check_responsibility_of_this_task(T, VF[i].p1, VF[i].p2, &state_center_L, &state_center_R)) + continue; + + /* calculate timestep of the face */ + face_dt = face_timestep(&state_center_L, &state_center_R, &hubble_a, &atime); +#ifdef MHD + sqrtatime = sqrt(atime); +#endif /* #ifdef MHD */ + + if(!(face_dt > 0)) + continue; + + /* now estimate the velocity of the midpoint of the face based on the velocities of the generators of the mesh. */ + double vel_face[3]; + + if(All.ComovingIntegrationOn) + for(j = 0; j < 3; j++) + { + state_center_L.velVertex[j] /= atime; /* convert vertex motion to peculiar velocity */ + state_center_R.velVertex[j] /= atime; + } + + /* rough motion of mid-point of edge */ + vel_face[0] = 0.5 * (state_center_L.velVertex[0] + state_center_R.velVertex[0]); + vel_face[1] = 0.5 * (state_center_L.velVertex[1] + state_center_R.velVertex[1]); + vel_face[2] = 0.5 * (state_center_L.velVertex[2] + state_center_R.velVertex[2]); + + double cx, cy, cz, facv; + + cx = VF[i].cx - 0.5 * (DP[VF[i].p2].x + DP[VF[i].p1].x); + cy = VF[i].cy - 0.5 * (DP[VF[i].p2].y + DP[VF[i].p1].y); + cz = VF[i].cz - 0.5 * (DP[VF[i].p2].z + DP[VF[i].p1].z); + + facv = (cx * (state_center_L.velVertex[0] - state_center_R.velVertex[0]) + + cy * (state_center_L.velVertex[1] - state_center_R.velVertex[1]) + + cz * (state_center_L.velVertex[2] - state_center_R.velVertex[2])) / + geom.nn; + + /* put in a limiter for highly distorted cells */ + double cc = sqrt(cx * cx + cy * cy + cz * cz); + if(cc > 0.9 * geom.nn) + facv *= (0.9 * geom.nn) / cc; + + vel_face[0] += facv * geom.nx; + vel_face[1] += facv * geom.ny; + vel_face[2] += facv * geom.nz; + +#if defined(VORONOI_STATIC_MESH) + vel_face[0] = 0; + vel_face[1] = 0; + vel_face[2] = 0; +#endif /* #if defined(VORONOI_STATIC_MESH) */ + +#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) + double vel_face_turned[3]; + /* for these riemann solvers, the riemann problem is not solved in the + * restframe of the face, instead the mesh motion is accounted for via + * an advection step. + */ + + /* turn the face velocity */ + vel_face_turned[0] = vel_face[0] * geom.nx + vel_face[1] * geom.ny + vel_face[2] * geom.nz; + vel_face_turned[1] = vel_face[0] * geom.mx + vel_face[1] * geom.my + vel_face[2] * geom.mz; + vel_face_turned[2] = vel_face[0] * geom.px + vel_face[1] * geom.py + vel_face[2] * geom.pz; +#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */ + + state_convert_to_local_frame(&state_center_L, vel_face, hubble_a, atime); + state_convert_to_local_frame(&state_center_R, vel_face, hubble_a, atime); + + /* copy center state to state at interface, then add extrapolation terms */ + state_L = state_center_L; + state_R = state_center_R; + + face_do_time_extrapolation(&delta_time_L, &state_center_L, atime); + face_do_time_extrapolation(&delta_time_R, &state_center_R, atime); + + face_do_spatial_extrapolation(&delta_space_L, &state_center_L, &state_center_R); + face_do_spatial_extrapolation(&delta_space_R, &state_center_R, &state_center_L); + + face_add_extrapolations(&state_L, &delta_time_L, &delta_space_L, &stat); + face_add_extrapolations(&state_R, &delta_time_R, &delta_space_R, &stat); + +#ifdef MHD + if(All.ComovingIntegrationOn) + { + state_L.Bx /= sqrtatime; + state_L.By /= sqrtatime; + state_L.Bz /= sqrtatime; + + state_R.Bx /= sqrtatime; + state_R.By /= sqrtatime; + state_R.Bz /= sqrtatime; + } +#endif /* #ifdef MHD */ + +#ifndef MESHRELAX +#ifndef ISOTHERM_EQS + /* check for crazy values */ + if(state_L.press < 0 || state_R.press < 0 || state_L.rho < 0 || state_R.rho < 0) + { + printf("i=%d press_L=%g press_R=%g rho_L=%g rho_R=%g\n", i, state_L.press, state_R.press, state_L.rho, state_R.rho); + printf("area=%g lx=%g ly=%g rx=%g ry=%g\n", VF[i].area, state_L.dx, state_L.dy, state_R.dx, state_R.dy); + terminate("found crazy values"); + } +#else /* #ifndef ISOTHERM_EQS */ + if(state_L.press < 0 || state_R.press < 0 || state_L.rho < 0 || state_R.rho < 0) + { + printf("i=%d rho_L=%g rho_R=%g\n", i, state_L.rho, state_R.rho); + printf("area=%g lx=%g ly=%g rx=%g ry=%g\n", VF[i].area, state_L.dx, state_L.dy, state_R.dx, state_R.dy); + terminate("found crazy values"); + } +#endif /* #ifndef ISOTHERM_EQS #else */ +#endif /* #ifndef MESHRELAX */ + + /* mirror velocity in case of reflecting boundaries */ + face_boundary_check(&T->DP[VF[i].p1], &state_L.velx, &state_L.vely, &state_L.velz); + face_boundary_check(&T->DP[VF[i].p2], &state_R.velx, &state_R.vely, &state_R.velz); + +#ifdef MHD + /* mirror magnetic field in case of reflecting boundaries */ + face_boundary_check(&T->DP[VF[i].p1], &state_L.Bx, &state_L.By, &state_L.Bz); + face_boundary_check(&T->DP[VF[i].p2], &state_R.Bx, &state_R.By, &state_R.Bz); +#endif /* #ifdef MHD */ + + /* turn the velocities to get velx perpendicular and vely and velz in the plane of the face */ + face_turn_velocities(&state_L, &geom); + face_turn_velocities(&state_R, &geom); + +#ifndef MESHRELAX + + /* call Riemann solver */ + + double press; +#ifdef RIEMANN_HLLC + press = godunov_flux_3d_hllc(&state_L, &state_R, &state_face, &fluxes); +#else /* #ifdef RIEMANN_HLLC */ +#ifdef RIEMANN_HLLD + press = godunov_flux_3d_hlld(&state_L, &state_R, vel_face_turned, &state_face, &fluxes); +#else /* #ifdef RIEMANN_HLLD */ + press = godunov_flux_3d(&state_L, &state_R, &state_face); /* exact ideal gas solver */ +#endif /* #ifdef RIEMANN_HLLD #else */ +#endif /* #ifdef RIEMANN_HLLC #else */ + + if(press < 0) + terminate("press < 0: ID_L: %d, ID_R: %d", VF[i].p1, VF[i].p2); + +#ifdef GODUNOV_STATS + get_mach_numbers(&state_L, &state_R, press); + if(st_L.rho > 1.0e-6 && st_R.rho > 1.0e-6) + fprintf(fdstats, "%g %g %g %g %g %g %g %g %g %g %g %g\n", state_L.rho, state_L.velx, state_L.press, state_L.rho, + state_L.velx, state_L.press, state_face.rho, state_face.velx, state_face.press, state_L.mach, state_R.mach, + VF[i].area); +#endif /* GODUNOV_STATS */ + +#endif /* #ifndef MESHRELAX */ + + /* turn the velocity field back */ + face_turnback_velocities(&state_face, &geom); + + /* add the face velocity again */ + state_face.velx += vel_face[0]; + state_face.vely += vel_face[1]; + state_face.velz += vel_face[2]; + +#ifndef MESHRELAX + +#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) + /* for non-exact Riemann solver, fluxes are already computed in the local frame, so convert to lab frame and turn momentum fluxes + * to the lab orientation */ + flux_convert_to_lab_frame(&state_L, &state_R, vel_face_turned, &fluxes); + face_turn_momentum_flux(&fluxes, &geom); + +#else /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */ + + /* calculate fluxes for exact Riemann problem */ + /* compute net flux with dot-product of outward normal and area of face */ + /* multiplication with area and time-step comes later */ + + face_get_fluxes(&state_L, &state_R, &state_face, &fluxes, &geom, vel_face); + +#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) #else */ + + /* set the face states and fluxes of those quantities that are passively advected */ + face_set_scalar_states_and_fluxes(&state_L, &state_R, &state_face, &fluxes); + + face_limit_fluxes(&state_L, &state_R, &state_center_L, &state_center_R, &fluxes, face_dt, &count, &count_reduced); + + /* put in cosmological factors */ + if(All.ComovingIntegrationOn) + { + fluxes.momentum[0] *= atime; + fluxes.momentum[1] *= atime; + fluxes.momentum[2] *= atime; + fluxes.energy *= atime * atime; +#ifdef MHD + fluxes.B[0] *= sqrtatime; + fluxes.B[1] *= sqrtatime; + fluxes.B[2] *= sqrtatime; +#ifdef MHD_POWELL + state_face.Bx *= sqrtatime; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + } + +#else /* #ifndef MESHRELAX */ + + /* just solve the advection equation instead of Riemann problem */ + + solve_advection(&state_L, &state_R, &state_face, &geom, vel_face); + face_clear_fluxes(&fluxes); + face_add_fluxes_advection(&state_face, &fluxes, &geom, vel_face); + face_set_scalar_states_and_fluxes(&state_L, &state_R, &state_face, &fluxes); + +#endif /* #ifndef MESHRELAX #else */ + +#ifndef ISOTHERM_EQS + if(!gsl_finite(fluxes.energy)) + { + printf("i=%d eFlux-Bummer: %g %g %g\n", i, fluxes.energy, state_face.press, state_face.rho); + printf("rho_L=%g velx_L=%g vely_L=%g velz_L=%g press_L=%g\n", state_L.rho, state_L.velx, state_L.vely, state_L.velz, + state_L.press); + printf("rho_R=%g velx_R=%g vely_R=%g velz_R=%g press_R=%g\n", state_R.rho, state_R.velx, state_R.vely, state_R.velz, + state_R.press); + print_particle_info(i); + terminate("infinity encountered"); + } +#endif /* #ifndef ISOTHERM_EQS */ + + /* now apply the flux to update the conserved states of the cells */ + + if(face_dt > 0) /* selects active faces */ + { + int k, p, q; + double dir; + double fac = face_dt * VF[i].area; +#if defined(MAXSCALARS) + int m; +#endif /* #if defined(MAXSCALARS) */ + + fac *= 0.5; + +#if defined(MHD_POWELL) + struct state *state_center, *delta_time; +#endif /* #if defined(MHD_POWELL) */ + for(k = 0; k < 2; k++) + { +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + int qother; +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ + if(k == 0) + { + q = VF[i].p1; + p = DP[q].index; + dir = -fac; +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + qother = VF[i].p2; +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ +#if defined(MHD_POWELL) + state_center = &state_center_L; + delta_time = &delta_time_L; +#endif /* #if defined(MHD_POWELL) */ + } + else + { + q = VF[i].p2; + p = DP[q].index; + dir = +fac; +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + qother = VF[i].p1; +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ +#if defined(MHD_POWELL) + state_center = &state_center_R; + delta_time = &delta_time_R; +#endif /* #if defined(MHD_POWELL) */ + } + + if(DP[q].task == ThisTask) + { + if(DP[q].index >= NumGas) /* this is a local ghost point */ + { + if(DP[VF[i].p1].ID == DP[VF[i].p2].ID) /* this may happen for reflective points */ + continue; + p -= NumGas; + } + + /* note: this will be executed if P[p] is a local point, independent of active or not */ + P[p].Mass += dir * fluxes.mass; + SphP[p].Momentum[0] += dir * fluxes.momentum[0]; + SphP[p].Momentum[1] += dir * fluxes.momentum[1]; + SphP[p].Momentum[2] += dir * fluxes.momentum[2]; + +#ifdef MHD + SphP[p].BConserved[0] += dir * fluxes.B[0]; + SphP[p].BConserved[1] += dir * fluxes.B[1]; + SphP[p].BConserved[2] += dir * fluxes.B[2]; +#if defined(MHD_POWELL) + double Velx = state_center->velx + delta_time->velx + vel_face[0]; + double Vely = state_center->vely + delta_time->vely + vel_face[1]; + double Velz = state_center->velz + delta_time->velz + vel_face[2]; + + if(All.ComovingIntegrationOn) + { + Velx += atime * hubble_a * state_center->dx; + Vely += atime * hubble_a * state_center->dy; + Velz += atime * hubble_a * state_center->dz; + } + + double Bx = state_center->Bx + delta_time->Bx; + double By = state_center->By + delta_time->By; + double Bz = state_center->Bz + delta_time->Bz; + + SphP[p].BConserved[0] += dir * Velx * state_face.Bx; + SphP[p].BConserved[1] += dir * Vely * state_face.Bx; + SphP[p].BConserved[2] += dir * Velz * state_face.Bx; + + SphP[p].Momentum[0] += dir * Bx * state_face.Bx; + SphP[p].Momentum[1] += dir * By * state_face.Bx; + SphP[p].Momentum[2] += dir * Bz * state_face.Bx; + + SphP[p].Energy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime; + + { + double dMomX = dir * Bx * state_face.Bx; + double dMomY = dir * By * state_face.Bx; + double dMomZ = dir * Bz * state_face.Bx; + + All.Powell_Momentum[0] += dMomX; + All.Powell_Momentum[1] += dMomY; + All.Powell_Momentum[2] += dMomZ; + + double dx = SphP[p].Center[0] - 0.5 * All.BoxSize; + double dy = SphP[p].Center[1] - 0.5 * All.BoxSize; + double dz = SphP[p].Center[2] - 0.5 * All.BoxSize; + + All.Powell_Angular_Momentum[0] += dy * dMomZ - dz * dMomY; + All.Powell_Angular_Momentum[1] += dz * dMomX - dx * dMomZ; + All.Powell_Angular_Momentum[2] += dx * dMomY - dy * dMomX; + All.Powell_Energy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime; + } +#endif /* #if defined(MHD_POWELL) */ +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(m = 0; m < N_Scalar; m++) + { + *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[m].offset_mass) += dir * fluxes.scalars[m]; + } +#endif /* #ifdef MAXSCALARS */ + +#if !defined(ISOTHERM_EQS) + SphP[p].Energy += dir * fluxes.energy; +#endif /* #if !defined(ISOTHERM_EQS) */ + } + else + { + /* here we have a foreign ghost point */ + if(DP[q].originalindex < 0) + terminate("should not happen"); + + if(Nflux >= MaxNflux) + { + T->Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNflux = T->Indi.AllocFacNflux; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNflux=%d Indi.AllocFacNflux=%g\n", ThisTask, MaxNflux, + T->Indi.AllocFacNflux); +#endif /* #ifdef VERBOSE */ + FluxList = myrealloc_movable(FluxList, MaxNflux * sizeof(struct flux_list_data)); + + if(Nflux >= MaxNflux) + terminate("Nflux >= MaxNflux"); + } + + FluxList[Nflux].task = DP[q].task; + FluxList[Nflux].index = DP[q].originalindex; + + FluxList[Nflux].dM = dir * fluxes.mass; + + FluxList[Nflux].dP[0] = dir * fluxes.momentum[0]; + FluxList[Nflux].dP[1] = dir * fluxes.momentum[1]; + FluxList[Nflux].dP[2] = dir * fluxes.momentum[2]; + +#if !defined(ISOTHERM_EQS) + FluxList[Nflux].dEnergy = dir * fluxes.energy; +#endif /* #if !defined(ISOTHERM_EQS) */ + +#ifdef MHD + FluxList[Nflux].dB[0] = dir * fluxes.B[0]; + FluxList[Nflux].dB[1] = dir * fluxes.B[1]; + FluxList[Nflux].dB[2] = dir * fluxes.B[2]; +#if defined(MHD_POWELL) + double Velx = state_center->velx + delta_time->velx + vel_face[0]; + double Vely = state_center->vely + delta_time->vely + vel_face[1]; + double Velz = state_center->velz + delta_time->velz + vel_face[2]; + + if(All.ComovingIntegrationOn) + { + Velx += atime * hubble_a * state_center->dx; + Vely += atime * hubble_a * state_center->dy; + Velz += atime * hubble_a * state_center->dz; + } + + double Bx = state_center->Bx + delta_time->Bx; + double By = state_center->By + delta_time->By; + double Bz = state_center->Bz + delta_time->Bz; + + FluxList[Nflux].dB[0] += dir * Velx * state_face.Bx; + FluxList[Nflux].dB[1] += dir * Vely * state_face.Bx; + FluxList[Nflux].dB[2] += dir * Velz * state_face.Bx; + + FluxList[Nflux].dP[0] += dir * Bx * state_face.Bx; + FluxList[Nflux].dP[1] += dir * By * state_face.Bx; + FluxList[Nflux].dP[2] += dir * Bz * state_face.Bx; +#ifndef ISOTHERM_EQS + FluxList[Nflux].dEnergy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime; +#endif /* #ifndef ISOTHERM_EQS */ + + { + double dMomX = dir * Bx * state_face.Bx; + double dMomY = dir * By * state_face.Bx; + double dMomZ = dir * Bz * state_face.Bx; + + All.Powell_Momentum[0] += dMomX; + All.Powell_Momentum[1] += dMomY; + All.Powell_Momentum[2] += dMomZ; + + double dx = PrimExch[p].Center[0] - 0.5 * All.BoxSize; + double dy = PrimExch[p].Center[1] - 0.5 * All.BoxSize; + double dz = PrimExch[p].Center[2] - 0.5 * All.BoxSize; + + All.Powell_Angular_Momentum[0] += dy * dMomZ - dz * dMomY; + All.Powell_Angular_Momentum[1] += dz * dMomX - dx * dMomZ; + All.Powell_Angular_Momentum[2] += dx * dMomY - dy * dMomX; + All.Powell_Energy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime; + } +#endif /* #if defined(MHD_POWELL) */ +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(m = 0; m < N_Scalar; m++) + FluxList[Nflux].dConservedScalars[m] = dir * fluxes.scalars[m]; +#endif /* #ifdef MAXSCALARS */ + + Nflux++; + } + } + } + } + /* end of big loop over all faces */ + + TIMER_STOPSTART(CPU_FLUXES, CPU_FLUXES_COMM); + + /* now exchange the flux-list and apply it when needed */ + apply_flux_list(); + + TIMER_STOPSTART(CPU_FLUXES_COMM, CPU_FLUXES); + + myfree(FluxList); + + double in[2] = {count, count_reduced}, out[2]; + MPI_Reduce(in, out, 2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + if(ThisTask == 0) + { + tot_count = out[0]; + tot_count_reduced = out[1]; + + printf("FLUX: exchanged fluxes over %g faces, with %g reduced (fraction %g), cumulative fraction %g\n", tot_count, + tot_count_reduced, tot_count_reduced / (tot_count + 1.0e-30), All.TotCountReducedFluxes / (All.TotCountFluxes + 1.0e-30)); + All.TotCountReducedFluxes += tot_count_reduced; + All.TotCountFluxes += tot_count; + } + + fvs_evaluate_statistics(&stat); + +#ifdef MESHRELAX + for(i = 0; i < NumGas; i++) + { + if(P[i].Mass < 0) + { + terminate("negative mass reached for cell=%d mass=%g", P[i].ID, P[i].Mass); + + P[i].Mass = 0; + SphP[i].Energy = 0; + SphP[i].Momentum[0] = 0; + SphP[i].Momentum[1] = 0; + SphP[i].Momentum[2] = 0; + } + } +#endif /* #ifdef MESHRELAX */ + +#ifdef GODUNOV_STATS + endrun(); +#endif /* #ifdef GODUNOV_STATS */ + +#ifdef ONEDIMS_SPHERICAL + apply_spherical_source_terms(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#if defined(MHD_POWELL) && defined(VERBOSE) + double Powell_Momentum[3]; + double Powell_Angular_Momentum[3]; + double Powell_Energy; + + MPI_Reduce(All.Powell_Momentum, Powell_Momentum, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(All.Powell_Angular_Momentum, Powell_Angular_Momentum, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&All.Powell_Energy, &Powell_Energy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + printf("MHD_POWELL: Total ST contribution: Mom=%g,%g,%g AngMom=%g,%g,%g Energy=%g\n", Powell_Momentum[0], Powell_Momentum[1], + Powell_Momentum[2], Powell_Angular_Momentum[0], Powell_Angular_Momentum[1], Powell_Angular_Momentum[2], Powell_Energy); +#endif /* #if defined(MHD_POWELL) && defined(VERBOSE) */ + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS + restore_face_areas(T); +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ + + TIMER_STOP(CPU_FLUXES); +} + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS +/*! \brief Writes face areas to a backup variable. + * + * \param[in, out] T Pointer to tessellation. + * + * \return void + */ +void backup_face_areas(tessellation *T) +{ + for(int i = 0; i < T->Nvf; i++) + T->VF[i].area_backup = T->VF[i].area; +} + +/*! \brief Restores face areas from a backup variable. + * + * \param[in, out] T Pointer to tessellation. + * + * \return void + */ +void restore_face_areas(tessellation *T) +{ + for(int i = 0; i < T->Nvf; i++) + T->VF[i].area = T->VF[i].area_backup; +} +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ + +/*! \brief Gets value of hydrodynamial quantities at face. + * + * \param[in] T Pointer to tessellation. + * \param[in] p Index in DP array. + * \param[in] i Index in VF array. + * \param[out] st State at face. + * + * \return 0 + */ +int face_get_state(tessellation *T, int p, int i, struct state *st) +{ + int particle; +#if defined(MAXSCALARS) + int j; +#endif /* #if defined(MAXSCALARS) */ + double aBegin; + + point *DP = T->DP; + face *VF = T->VF; + + particle = DP[p].index; + + if(particle < 0) + return -1; + + if(particle >= NumGas && DP[p].task == ThisTask) + particle -= NumGas; + + /* interpolation vector for the left state */ + if(DP[p].task == ThisTask) + { + st->dx = VF[i].cx - SphP[particle].Center[0]; + st->dy = VF[i].cy - SphP[particle].Center[1]; + st->dz = VF[i].cz - SphP[particle].Center[2]; + } + else + { + st->dx = VF[i].cx - PrimExch[particle].Center[0]; + st->dy = VF[i].cy - PrimExch[particle].Center[1]; + st->dz = VF[i].cz - PrimExch[particle].Center[2]; + } + + /* correct for periodicity */ +#if !defined(REFLECTIVE_X) && !defined(ONEDIMS_SPHERICAL) + if(st->dx < -boxHalf_X) + st->dx += boxSize_X; + if(st->dx > boxHalf_X) + st->dx -= boxSize_X; +#endif /* #if !defined(REFLECTIVE_X) && !defined(ONEDIMS_SPHERICAL) */ +#if !defined(REFLECTIVE_Y) + if(st->dy < -boxHalf_Y) + st->dy += boxSize_Y; + if(st->dy > boxHalf_Y) + st->dy -= boxSize_Y; +#endif /* #if !defined(REFLECTIVE_Y) */ +#if !defined(REFLECTIVE_Z) + if(st->dz < -boxHalf_Z) + st->dz += boxSize_Z; + if(st->dz > boxHalf_Z) + st->dz -= boxSize_Z; +#endif /* #if !defined(REFLECTIVE_Z) */ + +#ifdef ONEDIMS_SPHERICAL + if(DP[p].task == ThisTask) + st->radius = SphP[particle].Center[0]; + else + st->radius = PrimExch[particle].Center[0]; +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + if(DP[p].task == ThisTask) + { + st->velGas[0] = P[particle].Vel[0]; + st->velGas[1] = P[particle].Vel[1]; + st->velGas[2] = P[particle].Vel[2]; + + st->velVertex[0] = SphP[particle].VelVertex[0]; + st->velVertex[1] = SphP[particle].VelVertex[1]; + st->velVertex[2] = SphP[particle].VelVertex[2]; + + st->rho = SphP[particle].Density; + + st->press = SphP[particle].Pressure; + + st->grad = &SphP[particle].Grad; + + st->timeBin = P[particle].TimeBinHydro; + + st->volume = SphP[particle].Volume; + +#ifdef MHD + st->Bx = SphP[particle].B[0]; + st->By = SphP[particle].B[1]; + st->Bz = SphP[particle].B[2]; +#ifdef MHD_POWELL + st->divB = SphP[particle].DivB; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(j = 0; j < N_Scalar; j++) + st->scalars[j] = *(MyFloat *)(((char *)(&SphP[particle])) + scalar_elements[j].offset); +#endif /* #ifdef MAXSCALARS */ + + aBegin = SphP[particle].TimeLastPrimUpdate; + + st->oldmass = SphP[particle].OldMass; + st->surfacearea = SphP[particle].SurfaceArea; + st->activearea = SphP[particle].ActiveArea; + st->csnd = get_sound_speed(particle); + st->ID = P[particle].ID; + } + else + { + st->velGas[0] = PrimExch[particle].VelGas[0]; + st->velGas[1] = PrimExch[particle].VelGas[1]; + st->velGas[2] = PrimExch[particle].VelGas[2]; + + st->velVertex[0] = PrimExch[particle].VelVertex[0]; + st->velVertex[1] = PrimExch[particle].VelVertex[1]; + st->velVertex[2] = PrimExch[particle].VelVertex[2]; + + st->rho = PrimExch[particle].Density; + + st->press = PrimExch[particle].Pressure; + + st->grad = &GradExch[particle]; + + st->timeBin = PrimExch[particle].TimeBinHydro; /* This is the hydro timestep */ + + st->volume = PrimExch[particle].Volume; + +#ifdef MHD + st->Bx = PrimExch[particle].B[0]; + st->By = PrimExch[particle].B[1]; + st->Bz = PrimExch[particle].B[2]; +#ifdef MHD_POWELL + st->divB = PrimExch[particle].DivB; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(j = 0; j < N_Scalar; j++) + st->scalars[j] = PrimExch[particle].Scalars[j]; +#endif /* #ifdef MAXSCALARS */ + + aBegin = PrimExch[particle].TimeLastPrimUpdate; + + st->oldmass = PrimExch[particle].OldMass; + st->surfacearea = PrimExch[particle].SurfaceArea; + st->activearea = PrimExch[particle].ActiveArea; + st->csnd = PrimExch[particle].Csnd; + st->ID = DP[p].ID; + } + + st->dtExtrapolation = All.Time - aBegin; + + /* check for reflecting or outflowing boundaries */ + face_boundary_check_vertex(T, p, &st->velVertex[0], &st->velVertex[1], &st->velVertex[2]); + + return 0; +} + +/*! \brief Checks for boundary cells with non-periodic boundary conditions. + * + * Adjusts the velocities accordingly. + * + * \param[in] T Pointer to tessellation. + * \param[in] p Index in DP array. + * \param[in, out] velx Velocity in x coordinate. + * \param[in, out] vely Velocity in y coordinate. + * \param[in, out] velz Velocity in z coordinate. + * + * \return void + */ +void face_boundary_check_vertex(tessellation *T, int p, MyFloat *velx, MyFloat *vely, MyFloat *velz) +{ + /* check for reflecting or outflowing boundaries */ +#if defined(REFLECTIVE_X) + if((T->DP[p].image_flags & REFL_X_FLAGS)) + *velx *= -1; +#endif /* #if defined(REFLECTIVE_X) */ +#if defined(REFLECTIVE_Y) + if((T->DP[p].image_flags & REFL_Y_FLAGS)) + *vely *= -1; +#endif /* #if defined(REFLECTIVE_Y) */ +#if defined(REFLECTIVE_Z) + if((T->DP[p].image_flags & REFL_Z_FLAGS)) + *velz *= -1; +#endif /* #if defined(REFLECTIVE_Z) */ + +#ifdef ONEDIMS_SPHERICAL + if(p == -1) + *velx *= -1; +#endif /* #ifdef ONEDIMS_SPHERICAL */ +} + +/*! \brief Checks for boundary cells with non-periodic boundary conditions. + * + * \param[in] p Pointer to point. + * \param[in, out] velx Velocity in x direction. + * \param[in, out] vely Velocity in y direction. + * \param[in, out] velz Velocity in z direction. + * + * \return void + */ +void face_boundary_check(point *p, double *velx, double *vely, double *velz) +{ + /* check for reflecting or outflowing boundaries */ +#if defined(REFLECTIVE_X) + if((p->image_flags & REFL_X_FLAGS) && !(p->image_flags & OUTFLOW_X)) + *velx *= -1; +#endif /* #if defined(REFLECTIVE_X) */ +#if defined(REFLECTIVE_Y) + if((p->image_flags & REFL_Y_FLAGS) && !(p->image_flags & OUTFLOW_Y)) + *vely *= -1; +#endif /* #if defined(REFLECTIVE_Y) */ +#if defined(REFLECTIVE_Z) + if((p->image_flags & REFL_Z_FLAGS) && !(p->image_flags & OUTFLOW_Z)) + *velz *= -1; +#endif /* #if defined(REFLECTIVE_Z) */ + +#ifdef ONEDIMS_SPHERICAL + if(p == &Mesh.DP[-1]) + *velx *= -1; +#endif /* #ifdef ONEDIMS_SPHERICAL */ +} + +/*! \brief Checks whether local task is responsible for a face. + * + * \param[in] T Pointer to tessellation. + * \param[in] p1 Index in DP array of point1 making up the face. + * \param[in] p2 Index in DP array of point2 making up the face. + * \param[in] st_L Left hand side state of the face. + * \param[in] st_R Right hand side state of the face. + * + * \return -1 if not local responsibility, 0 if it is. + */ +int face_check_responsibility_of_this_task(tessellation *T, int p1, int p2, struct state *st_L, struct state *st_R) +{ + int low_p, high_p; + struct state *low_state, *high_state; + + point *DP = T->DP; + + if(DP[p1].ID < DP[p2].ID) + { + low_p = p1; + high_p = p2; + low_state = st_L; + high_state = st_R; + } + else if(DP[p1].ID > DP[p2].ID) + { + low_p = p2; + high_p = p1; + low_state = st_R; + high_state = st_L; + } + else + { + /* equality of the IDs should only occur for reflective boundaries */ + if(DP[p1].task == ThisTask && DP[p1].index < NumGas) + { + low_p = p1; + high_p = p2; + low_state = st_L; + high_state = st_R; + } + else + { + low_p = p2; + high_p = p1; + low_state = st_R; + high_state = st_L; + } + } + + if(TimeBinSynchronized[low_state->timeBin]) /* the one with the lower ID is active */ + { + /* we need to check whether the one with the lower ID is a local particle */ + if(DP[low_p].task == ThisTask && DP[low_p].index < NumGas) + return 0; + } + else if(TimeBinSynchronized[high_state->timeBin]) /* only the side with the higher ID is active */ + { + /* we need to check whether we hold the one with the higher ID, if yes, we'll do it */ + if(DP[high_p].task == ThisTask && DP[high_p].index < NumGas) + return 0; + } + + return -1; /* we can skip this face on the local task */ +} + +/*! \brief Determines timestep of face. + * + * \param[in] state_L Left hand side state of face. + * \param[in] state_R Right hand side state of face. + * \param[out] hubble_a Value of Hubble function at scalefactor + * a(cosmological). + * \param[out] atime Scalefactor (cosmological). + * + * \return Face timestep. + */ +double face_timestep(struct state *state_L, struct state *state_R, double *hubble_a, double *atime) +{ + integertime ti_begin_L, ti_begin_R; + short int timeBin; + double face_dt; + + /* determine most recent start of the time bins */ + ti_begin_L = (All.Ti_Current >> state_L->timeBin) << state_L->timeBin; + ti_begin_R = (All.Ti_Current >> state_R->timeBin) << state_R->timeBin; + + /* take the minimum of the two */ + timeBin = state_L->timeBin; + if(timeBin > state_R->timeBin) + timeBin = state_R->timeBin; + + /* compute the half-step prediction times */ + state_L->dt_half = (All.Ti_Current + (((integertime)1) << (timeBin - 1)) - ti_begin_L) * All.Timebase_interval; + state_R->dt_half = (All.Ti_Current + (((integertime)1) << (timeBin - 1)) - ti_begin_R) * All.Timebase_interval; + + if(All.ComovingIntegrationOn) + { + /* calculate scale factor at middle of timestep */ + *atime = All.TimeBegin * exp((All.Ti_Current + (((integertime)1) << (timeBin - 1))) * All.Timebase_interval); + *hubble_a = hubble_function(*atime); + } + else + *atime = *hubble_a = 1.0; + + /* set the actual time-step for the face */ + face_dt = (((integertime)1) << timeBin) * All.Timebase_interval; + + if(All.ComovingIntegrationOn) + { + /* converts to delta_t */ + state_L->dt_half /= *hubble_a; + state_R->dt_half /= *hubble_a; + face_dt /= *hubble_a; + + face_dt /= *atime; /* we need dt/a, the (1/a) takes care of the gradient in the cosmological euler equations */ + + state_L->dtExtrapolation /= *hubble_a; + state_L->dtExtrapolation /= *atime; + state_R->dtExtrapolation /= *hubble_a; + state_R->dtExtrapolation /= *atime; + } + + return face_dt; +} + +/*! \brief Converts the velocities to local frame, compensating for the + * movement of the face. + * + * \param[in, out] st State to be converted to local frame. + * \param[in] vel_face Face velocity. + * \param[in] hubble_a Value of Hubble function at scalefactor + * a (cosmological). + * \param[in] atime Scalefactor (cosmological). + * + * \return void + */ +void state_convert_to_local_frame(struct state *st, double *vel_face, double hubble_a, double atime) +{ + if(All.ComovingIntegrationOn) + { + st->velGas[0] /= atime; /* convert to peculiar velocity */ + st->velGas[1] /= atime; + st->velGas[2] /= atime; + } + + st->velx = st->velGas[0] - vel_face[0]; + st->vely = st->velGas[1] - vel_face[1]; + st->velz = st->velGas[2] - vel_face[2]; + + if(All.ComovingIntegrationOn) + { + st->velx -= atime * hubble_a * st->dx; /* need to get the physical velocity relative to the face */ + st->vely -= atime * hubble_a * st->dy; + st->velz -= atime * hubble_a * st->dz; + } +} + +/*! \brief Extrapolates the state in time. + * + * \param[out] delta Change due to time extrapolation. + * \param[in] st State to be extrapolated. + * \param[in] atime Scalefactor at this time (cosmological). + * + * \return void + */ +void face_do_time_extrapolation(struct state *delta, struct state *st, double atime) +{ + /* st is the state at the center of the cell */ + + /* the code still allows for emtpy cells but we are going to divide + * by rho, so ... + */ + if(st->rho <= 0) + return; + +#if defined(MESHRELAX) || defined(DISABLE_TIME_EXTRAPOLATION) + /* do not time extrapolation */ + (void)st; + (void)atime; + memset(delta, 0, sizeof(struct state)); + return; +#endif /* #if defined (MESHRELAX) || defined (DISABLE_TIME_EXTRAPOLATION) */ + + struct grad_data *grad = st->grad; + + double dt_half = st->dtExtrapolation; + + if(All.ComovingIntegrationOn) + dt_half /= atime; + + delta->rho = -dt_half * (st->velx * grad->drho[0] + st->rho * grad->dvel[0][0] + st->vely * grad->drho[1] + + st->rho * grad->dvel[1][1] + st->velz * grad->drho[2] + st->rho * grad->dvel[2][2]); + + delta->velx = -dt_half * (1.0 / st->rho * grad->dpress[0] + st->velx * grad->dvel[0][0] + st->vely * grad->dvel[0][1] + + st->velz * grad->dvel[0][2]); + + delta->vely = -dt_half * (1.0 / st->rho * grad->dpress[1] + st->velx * grad->dvel[1][0] + st->vely * grad->dvel[1][1] + + st->velz * grad->dvel[1][2]); + + delta->velz = -dt_half * (1.0 / st->rho * grad->dpress[2] + st->velx * grad->dvel[2][0] + st->vely * grad->dvel[2][1] + + st->velz * grad->dvel[2][2]); + + delta->press = -dt_half * (GAMMA * st->press * (grad->dvel[0][0] + grad->dvel[1][1] + grad->dvel[2][2]) + + st->velx * grad->dpress[0] + st->vely * grad->dpress[1] + st->velz * grad->dpress[2]); + +#ifdef ONEDIMS_SPHERICAL + delta->velx += dt_half * 2. * st->press / (st->rho * st->radius); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#ifdef MHD + delta->velx += + -dt_half * (1.0 / st->rho * + (st->By * grad->dB[1][0] + st->Bz * grad->dB[2][0] - st->By * grad->dB[0][1] - st->Bz * grad->dB[0][2]) / atime); + + delta->vely += + -dt_half * (1.0 / st->rho * + (st->Bx * grad->dB[0][1] + st->Bz * grad->dB[2][1] - st->Bx * grad->dB[1][0] - st->Bz * grad->dB[1][2]) / atime); + + delta->velz += + -dt_half * (1.0 / st->rho * + (st->Bx * grad->dB[0][2] + st->By * grad->dB[1][2] - st->Bx * grad->dB[2][0] - st->By * grad->dB[2][1]) / atime); + + delta->Bx = + -dt_half * (-st->velx * grad->dB[1][1] - grad->dvel[0][1] * st->By + st->vely * grad->dB[0][1] + grad->dvel[1][1] * st->Bx + + st->velz * grad->dB[0][2] + grad->dvel[2][2] * st->Bx - st->velx * grad->dB[2][2] - grad->dvel[0][2] * st->Bz); + + delta->By = + -dt_half * (+st->velx * grad->dB[1][0] + grad->dvel[0][0] * st->By - st->vely * grad->dB[0][0] - grad->dvel[1][0] * st->Bx - + st->vely * grad->dB[2][2] - grad->dvel[1][2] * st->Bz + st->velz * grad->dB[1][2] + grad->dvel[2][2] * st->By); + + delta->Bz = + -dt_half * (-st->velz * grad->dB[0][0] - grad->dvel[2][0] * st->Bx + st->velx * grad->dB[2][0] + grad->dvel[0][0] * st->Bz + + st->vely * grad->dB[2][1] + grad->dvel[1][1] * st->Bz - st->velz * grad->dB[1][1] - grad->dvel[2][1] * st->By); +#endif /* #ifdef MHD */ + +#if defined(MAXSCALARS) + int k; + for(k = 0; k < N_Scalar; k++) + { + delta->scalars[k] = + -dt_half * (st->velx * grad->dscalars[k][0] + st->vely * grad->dscalars[k][1] + st->velz * grad->dscalars[k][2]); + } +#endif /* #if defined(MAXSCALARS) */ +} + +/*! \brief Extrapolates the state in space. + * + * Linear extrapolation with neighbor cell to their common face. + * + * \param[out] delta Change due to time extrapolation. + * \param[in] st State to be extrapolated. + * \param[in] st_other state of other cell. + * + * \return void + */ +void face_do_spatial_extrapolation(struct state *delta, struct state *st, struct state *st_other) +{ +#ifdef DISABLE_SPATIAL_RECONSTRUCTION + memset(delta, 0, sizeof(struct state)); + return; +#endif /* #ifdef DISABLE_SPATIAL_RECONSTRUCTION */ + +#ifdef NO_RECONSTRUCTION_AT_STRONG_SHOCKS + if(dmax(st->press, st_other->press) > 100. * dmin(st->press, st_other->press)) + { + memset(delta, 0, sizeof(struct state)); + return; + } +#endif /* #ifdef NO_RECONSTRUCTION_AT_STRONG_SHOCKS */ + + struct grad_data *grad = st->grad; + + double dx[3]; + dx[0] = st->dx; + dx[1] = st->dy; + dx[2] = st->dz; + + double r[3]; + r[0] = -st_other->dx + st->dx; + r[1] = -st_other->dy + st->dy; + r[2] = -st_other->dz + st->dz; + + face_do_spatial_extrapolation_single_quantity(&delta->rho, st->rho, st_other->rho, grad->drho, dx, r); + + face_do_spatial_extrapolation_single_quantity(&delta->velx, st->velx, st_other->velx, grad->dvel[0], dx, r); + face_do_spatial_extrapolation_single_quantity(&delta->vely, st->vely, st_other->vely, grad->dvel[1], dx, r); + face_do_spatial_extrapolation_single_quantity(&delta->velz, st->velz, st_other->velz, grad->dvel[2], dx, r); + + face_do_spatial_extrapolation_single_quantity(&delta->press, st->press, st_other->press, grad->dpress, dx, r); + +#ifdef MHD + face_do_spatial_extrapolation_single_quantity(&delta->Bx, st->Bx, st_other->Bx, grad->dB[0], dx, r); + face_do_spatial_extrapolation_single_quantity(&delta->By, st->By, st_other->By, grad->dB[1], dx, r); + face_do_spatial_extrapolation_single_quantity(&delta->Bz, st->Bz, st_other->Bz, grad->dB[2], dx, r); +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + int k; + for(k = 0; k < N_Scalar; k++) + { + face_do_spatial_extrapolation_single_quantity(&delta->scalars[k], st->scalars[k], st_other->scalars[k], grad->dscalars[k], dx, + r); + } +#endif /* #ifdef MAXSCALARS */ +} + +/*! \brief Extrapolates a single quantity in space. + * + * Linear interpolation with neighbor cell to their common face. + * + * \param[out] delta Change due to time extrapolation. + * \param[in] st State to be extrapolated (unused). + * \param[in] st_other state of other cell (unused). + * \param[in] grad Gradient used for extrapolation. + * \param[in] dx normal vector. + * \param[in] r (unused). + * + * \return void + */ +void face_do_spatial_extrapolation_single_quantity(double *delta, double st, double st_other, MySingle *grad, double *dx, double *r) +{ + (void)st; + (void)st_other; + (void)r; + *delta = grad[0] * dx[0] + grad[1] * dx[1] + grad[2] * dx[2]; +} + +/*! \brief Adds space and time extrapolation to state. + * + * \param[in, out] st_face State that is modified. + * \param[in] delta_time Change of state due to time extrapolation. + * \param[in] delta_space Change of state due to space extrapolation. + * \param[in, out] stat Structure that counts face value statistics. + * + * \return void + */ +void face_add_extrapolations(struct state *st_face, struct state *delta_time, struct state *delta_space, struct fvs_stat *stat) +{ + stat->count_disable_extrapolation += 1; + + if(st_face->rho <= 0) + return; + + if(st_face->rho + delta_time->rho + delta_space->rho < 0 || st_face->press + delta_time->press + delta_space->press < 0) + return; + + stat->count_disable_extrapolation -= 1; + +#if !defined(MESHRELAX) && !defined(DISABLE_TIME_EXTRAPOLATION) + face_add_extrapolation(st_face, delta_time, stat); +#endif /* #if !defined(MESHRELAX) && !defined(DISABLE_TIME_EXTRAPOLATION) */ + +#if !defined(DISABLE_SPATIAL_EXTRAPOLATION) + face_add_extrapolation(st_face, delta_space, stat); +#endif /* #if !defined(DISABLE_SPATIAL_EXTRAPOLATION) */ +} + +/*! \brief Adds an extrapolation to state. + * + * Called in face_add_extrapolations(..). + * + * \param[in, out] st_face State that is modified. + * \param[in] delta Change of state due to extrapolation. + * \param[in] stat (unused) + * + * \return void + */ +void face_add_extrapolation(struct state *st_face, struct state *delta, struct fvs_stat *stat) +{ + st_face->rho += delta->rho; + st_face->velx += delta->velx; + st_face->vely += delta->vely; + st_face->velz += delta->velz; + st_face->press += delta->press; + +#ifdef MHD +#ifndef ONEDIMS + /* in one dimension, Bx has to be constant! */ + st_face->Bx += delta->Bx; +#endif /* #ifndef ONEDIMS */ + st_face->By += delta->By; + st_face->Bz += delta->Bz; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + int k; + for(k = 0; k < N_Scalar; k++) + st_face->scalars[k] += delta->scalars[k]; +#endif /* #ifdef MAXSCALARS */ +} + +/*! \brief Adds an extrapolation to state. + * + * But checks for positivity of density. + * + * \param[in, out] st_face State that is modified. + * \param[in] delta Change of state due to extrapolation. + * \param[in, out] stat Structure that counts face value statistics. + * + * \return void + */ +void face_add_extrapolation_with_check(struct state *st_face, struct state *delta, struct fvs_stat *stat) +{ + stat->count_disable_extrapolation += 1; + + if(st_face->rho <= 0) + return; + + if(st_face->rho + delta->rho < 0 || st_face->press + delta->press < 0) + return; + + stat->count_disable_extrapolation -= 1; + + face_add_extrapolation(st_face, delta, stat); +} + +/*! \brief Rotates velocities and magnetic field. + * + * \param[in, out] st State that containes velocities to be rotated. + * \param[in] geom Geometry with a rotation matrix. + * + * \return void + */ +void face_turn_velocities(struct state *st, struct geometry *geom) +{ + double velx, vely, velz; + + velx = st->velx; + vely = st->vely; + velz = st->velz; + + st->velx = velx * geom->nx + vely * geom->ny + velz * geom->nz; + st->vely = velx * geom->mx + vely * geom->my + velz * geom->mz; + st->velz = velx * geom->px + vely * geom->py + velz * geom->pz; + +#ifdef MHD + double Bx, By, Bz; + + Bx = st->Bx; + By = st->By; + Bz = st->Bz; + + st->Bx = Bx * geom->nx + By * geom->ny + Bz * geom->nz; + st->By = Bx * geom->mx + By * geom->my + Bz * geom->mz; + st->Bz = Bx * geom->px + By * geom->py + Bz * geom->pz; +#endif /* #ifdef MHD */ +} + +/*! \brief Sets the state at the face to its upwind value. + * + * \param[in] st_L Left hand side hydrodynamical state. + * \param[in] st_R Right hand side hydrodynamical state. + * \param[out] st_face State at face. + * \param[in] geom Geometry structure that includes normal vector of face. + * \param[in] vel_face Velocity vector of face. + * + * \return void + */ +void solve_advection(struct state *st_L, struct state *st_R, struct state_face *st_face, struct geometry *geom, double *vel_face) +{ + double ev = vel_face[0] * geom->nx + vel_face[1] * geom->ny + vel_face[2] * geom->nz; + + if(ev < 0) + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; + st_face->press = st_L->press; + } + else + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; + st_face->press = st_R->press; + } +} + +/*! \brief Rotates velocities backwards. + * + * Inverse operation to face_turn_velocities(...). + * + * \param[in, out] st State that containes velocities to be rotated. + * \param[in] geom Geometry with a rotation matrix. + * + * \return void + */ +void face_turnback_velocities(struct state_face *st_face, struct geometry *geom) +{ + double velx, vely, velz; + + velx = st_face->velx; + vely = st_face->vely; + velz = st_face->velz; + + st_face->velx = velx * geom->nx + vely * geom->mx + velz * geom->px; + st_face->vely = velx * geom->ny + vely * geom->my + velz * geom->py; + st_face->velz = velx * geom->nz + vely * geom->mz + velz * geom->pz; +} + +/*! \brief Sets the scalar states compute the scalar flux from mass flux. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[out] st_face Face state. + * \param[out] flux Flux over face. + * + * \return void + */ +void face_set_scalar_states_and_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux) +{ +#if defined(MAXSCALARS) + int i; + + double normfac, normifac; + + if(flux->mass > 0) + st_face->scalars = st_L->scalars; + else + st_face->scalars = st_R->scalars; + + /* Normalize species here */ + normfac = 0; + + for(i = 0; i < N_Scalar; i++) + { + flux->scalars[i] = st_face->scalars[i] * flux->mass; + + if(scalar_elements[i].type == SCALAR_TYPE_SPECIES) + normfac += st_face->scalars[i]; + } + + if(normfac != 0) + { + normifac = 1.0 / normfac; + + for(i = 0; i < N_Scalar; i++) + if(scalar_elements[i].type == SCALAR_TYPE_SPECIES || scalar_elements[i].type == SCALAR_TYPE_NORMALIZE) + flux->scalars[i] *= normifac; + } + +#endif /* #if defined(MAXSCALARS) */ +} + +#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) +/*! \brief Converts flux from face frame to simulation box frame. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in] vel_face Velocity vector of face. + * \param[in, out] flux Flux vector accross face. + * + * \return void + */ +void flux_convert_to_lab_frame(struct state *st_L, struct state *st_R, double *vel_face, struct fluxes *flux) +{ + double momx = flux->momentum[0]; + double momy = flux->momentum[1]; + double momz = flux->momentum[2]; + + flux->momentum[0] += vel_face[0] * flux->mass; + flux->momentum[1] += vel_face[1] * flux->mass; + flux->momentum[2] += vel_face[2] * flux->mass; + + flux->energy += momx * vel_face[0] + momy * vel_face[1] + momz * vel_face[2] + + 0.5 * flux->mass * (vel_face[0] * vel_face[0] + vel_face[1] * vel_face[1] + vel_face[2] * vel_face[2]); + +#ifdef MHD + double Bx; + Bx = 0.5 * (st_L->Bx + st_R->Bx); + + flux->B[0] -= vel_face[0] * Bx; + flux->B[1] -= vel_face[1] * Bx; + flux->B[2] -= vel_face[2] * Bx; +#endif /* #ifdef MHD */ +} +#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */ + +/*! \brief Rotates momenum flux and magnetic flux vector. + * + * flux->momentum vector needs to be turned in case the HLLC or Rosunov + * Riemann solvers are used. + * + * \param[in, out] flux Flux vector which is rotated. + * \param[in] geom Geometry structure that holds rotation matrix. + * + * \return void + */ +void face_turn_momentum_flux(struct fluxes *flux, struct geometry *geom) +{ + double momx = flux->momentum[0]; + double momy = flux->momentum[1]; + double momz = flux->momentum[2]; + + flux->momentum[0] = momx * geom->nx + momy * geom->mx + momz * geom->px; + flux->momentum[1] = momx * geom->ny + momy * geom->my + momz * geom->py; + flux->momentum[2] = momx * geom->nz + momy * geom->mz + momz * geom->pz; + +#ifdef MHD + double Bx = flux->B[0]; + double By = flux->B[1]; + double Bz = flux->B[2]; + + flux->B[0] = Bx * geom->nx + By * geom->mx + Bz * geom->px; + flux->B[1] = Bx * geom->ny + By * geom->my + Bz * geom->py; + flux->B[2] = Bx * geom->nz + By * geom->mz + Bz * geom->pz; +#endif /* #ifdef MHD */ +} + +/*! \brief Calculates the flux from face states. + * + * \param[in] st_L (unused) + * \param[in] st_R (unused) + * \param[in] st_face State at face. + * \param[out] flux Flux at face. + * \param[in] geom Geometry structure containing normal vector of face. + * \param[in] vel_face Velocity vector of face. + * + * \return void + */ +void face_get_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux, struct geometry *geom, + double *vel_face) +{ + double fac; + + /* calculate fluxes for ordinary Riemann solver */ + + fac = (st_face->velx - vel_face[0]) * geom->nx + (st_face->vely - vel_face[1]) * geom->ny + (st_face->velz - vel_face[2]) * geom->nz; + + flux->mass = st_face->rho * fac; + + flux->momentum[0] = (st_face->rho * st_face->velx * fac + st_face->press * geom->nx); + flux->momentum[1] = (st_face->rho * st_face->vely * fac + st_face->press * geom->ny); + flux->momentum[2] = (st_face->rho * st_face->velz * fac + st_face->press * geom->nz); + +#ifndef ISOTHERM_EQS + flux->energy = + (0.5 * st_face->rho * (st_face->velx * st_face->velx + st_face->vely * st_face->vely + st_face->velz * st_face->velz) + + st_face->press / GAMMA_MINUS1) * + fac + + st_face->press * (st_face->velx * geom->nx + st_face->vely * geom->ny + st_face->velz * geom->nz); +#endif /* #ifndef ISOTHERM_EQS */ +} + +/*! \brief Flux limiter. + * + * Make sure cell cannot loose more mass than it contains... + * + * \param[in] st_L Left hand side hydrodynamical state. + * \param[in] st_R Right hand side hydrodynamical state. + * \param[in] st_center_L (unused) + * \param[in] st_center_R (unused) + * \param[in, out] fulx Flux vector. + * \param[in] dt Timestep. + * \param[in, out] count Number of calls of this function. + * \param[in, out] count_reduced Number if flux reductions caused by this + * function. + * + * \return void + */ +void face_limit_fluxes(struct state *st_L, struct state *st_R, struct state *st_center_L, struct state *st_center_R, + struct fluxes *flux, double dt, double *count, double *count_reduced) +{ + *count = *count + 1.0; + + /* choose upwind mass to determine a stability bound on the maximum allowed mass exchange, + (we do this to prevent negative masses under all circumstances) */ + double upwind_mass, upwind_activearea, reduc_fac; + integertime upwind_timebin, downstream_timebin; + + if(flux->mass > 0) + { + upwind_mass = st_L->oldmass; + upwind_activearea = st_L->activearea; + upwind_timebin = st_L->timeBin; + downstream_timebin = st_R->timeBin; + } + else + { + upwind_mass = st_R->oldmass; + upwind_activearea = st_R->activearea; + upwind_timebin = st_R->timeBin; + downstream_timebin = st_L->timeBin; + } + + if(upwind_timebin > downstream_timebin) + dt *= pow(2, upwind_timebin - downstream_timebin); + + if(fabs(flux->mass * dt * upwind_activearea) > 0.9 * upwind_mass) + { + reduc_fac = 0.9 * upwind_mass / fabs(flux->mass * dt * upwind_activearea); + + *count_reduced = *count_reduced + 1.0; + + flux->mass *= reduc_fac; + flux->energy *= reduc_fac; + flux->momentum[0] *= reduc_fac; + flux->momentum[1] *= reduc_fac; + flux->momentum[2] *= reduc_fac; + + /* remark: do not reduce the magnetic field flux, as it is not coupled to the mass flux */ +#ifdef MAXSCALARS + for(int i = 0; i < N_Scalar; i++) + flux->scalars[i] *= reduc_fac; +#endif /* #ifdef MAXSCALARS */ + } +} + +/*! \brief Set flux vector entries to zero. + * + * \param[out] flux Flux vector. + * + * \return void + */ +void face_clear_fluxes(struct fluxes *flux) +{ + flux->mass = 0; + flux->momentum[0] = 0; + flux->momentum[1] = 0; + flux->momentum[2] = 0; + flux->energy = 0; +#ifdef MHD + flux->B[0] = 0; + flux->B[1] = 0; + flux->B[2] = 0; +#endif /* #ifdef MHD */ +} + +/*! \brief Adds flux due to advection to flux vector. + * + * \param[in] st_face State at face. + * \param[in, out] flux Flux vector. + * \param[in] geom Geometry structure containing the face normal vector. + * \param[in] vel_face Velocity vector of the face. + * + * \return void + */ +void face_add_fluxes_advection(struct state_face *st_face, struct fluxes *flux, struct geometry *geom, double *vel_face) +{ + double fac = -vel_face[0] * geom->nx - vel_face[1] * geom->ny - vel_face[2] * geom->nz; + + flux->mass += st_face->rho * fac; + + flux->momentum[0] += st_face->rho * st_face->velx * fac; + flux->momentum[1] += st_face->rho * st_face->vely * fac; + flux->momentum[2] += st_face->rho * st_face->velz * fac; + + flux->energy += + 0.5 * st_face->rho * fac * (st_face->velx * st_face->velx + st_face->vely * st_face->vely + st_face->velz * st_face->velz) + + st_face->press / GAMMA_MINUS1 * fac; +} + +/*! \brief Compares tasks of flux list data. + * + * Sort kernel for flux list data. + * + * \param[in] a First flux list data object. + * \param[in] b Second flux list data object. + * + * \return (-1,0,1) -1 if a->task < b->task. + */ +int flux_list_data_compare(const void *a, const void *b) +{ + if(((struct flux_list_data *)a)->task < (((struct flux_list_data *)b)->task)) + return -1; + + if(((struct flux_list_data *)a)->task > (((struct flux_list_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Communicates flux list and applies fluxes to conserved hydro + * variables. + * + * \return void + */ +void apply_flux_list(void) +{ + int i, j, p, nimport, ngrp, recvTask; +#if defined(MAXSCALARS) + int k; +#endif /* #if defined(MAXSCALARS) */ + + /* now exchange the flux-list and apply it when needed */ + + mysort(FluxList, Nflux, sizeof(struct flux_list_data), flux_list_data_compare); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < Nflux; i++) + Send_count[FluxList[i].task]++; + + if(Send_count[ThisTask] > 0) + terminate("Send_count[ThisTask]"); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct flux_list_data *FluxListGet = (struct flux_list_data *)mymalloc("FluxListGet", nimport * sizeof(struct flux_list_data)); + + /* exchange particle data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&FluxList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct flux_list_data), MPI_BYTE, recvTask, + TAG_DENS_A, &FluxListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct flux_list_data), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + /* apply the fluxes */ + + for(i = 0; i < nimport; i++) + { + p = FluxListGet[i].index; + + P[p].Mass += FluxListGet[i].dM; + + SphP[p].Momentum[0] += FluxListGet[i].dP[0]; + SphP[p].Momentum[1] += FluxListGet[i].dP[1]; + SphP[p].Momentum[2] += FluxListGet[i].dP[2]; +#ifdef MHD + SphP[p].BConserved[0] += FluxListGet[i].dB[0]; + SphP[p].BConserved[1] += FluxListGet[i].dB[1]; + SphP[p].BConserved[2] += FluxListGet[i].dB[2]; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(k = 0; k < N_Scalar; k++) + *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[k].offset_mass) += FluxListGet[i].dConservedScalars[k]; +#endif /* #ifdef MAXSCALARS */ + +#ifndef ISOTHERM_EQS + SphP[p].Energy += FluxListGet[i].dEnergy; +#endif /* #ifndef ISOTHERM_EQS */ + } + myfree(FluxListGet); +} + +/*! \brief Initializes statistics of finite volume solver. + * + * \param[out] stat Statistics structure. + * + * \return void + */ +void fvs_initialize_statistics(struct fvs_stat *stat) { stat->count_disable_extrapolation = 0; } + +/*! \brief Gathers statistics properties from all tasks and prints information. + * + * \param[in] stat Finite volume solver statistics structure. + * + * \return void + */ +void fvs_evaluate_statistics(struct fvs_stat *stat) +{ +#ifdef VERBOSE + int count_disable_extrapolation = 0; + MPI_Reduce(&stat->count_disable_extrapolation, &count_disable_extrapolation, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + mpi_printf("FLUX: Disabled extrapolation for %d interfaces.\n", count_disable_extrapolation); +#endif /* #ifdef VERBOSE */ +} + +#ifdef ONEDIMS_SPHERICAL +/*! \brief Applies source terms that occur due to spherical symmetry. + * + * \return void + */ +void apply_spherical_source_terms() +{ + int idx, i; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + double Pressure = SphP[i].Pressure; + double dt_Extrapolation = All.Time - SphP[i].TimeLastPrimUpdate; + struct grad_data *grad = &SphP[i].Grad; + + Pressure += -dt_Extrapolation * (GAMMA * Pressure * (grad->dvel[0][0] + grad->dvel[1][1] + grad->dvel[2][2]) + + P[i].Vel[0] * grad->dpress[0] + P[i].Vel[1] * grad->dpress[1] + P[i].Vel[2] * grad->dpress[2]); + + double dt = 0.5 * (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + SphP[i].Momentum[0] += dt * Pressure * (Mesh.VF[i + 1].area - Mesh.VF[i].area); + } +} +#endif /* #ifdef ONEDIMS_SPHERICAL */ diff --git a/src/amuse/community/arepo/src/hydro/gradients.c b/src/amuse/community/arepo/src/hydro/gradients.c new file mode 100644 index 0000000000..191c13635c --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/gradients.c @@ -0,0 +1,149 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gradients.c + * \date 05/2018 + * \brief Routines to initialize gradient data. + * \details contains functions: + * void init_gradients() + * void gradient_init(MyFloat * addr, MyFloat * addr_exch, + * MySingle * addr_grad, int type) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +int N_Grad = 0; + +struct grad_elements grad_elements[MAXGRADIENTS], *GDensity, *GVelx, *GVely, *GVelz, *GPressure, *GUtherm; + +/*! \brief Initializes all gradient fields. + * + * Density, velocity, pressure and if needed magnetic fields and passive + * scalars. + * + * \return void + */ +void init_gradients() +{ +#if defined(MAXSCALARS) + int k; +#endif /* #if defined(MAXSCALARS) */ + + gradient_init(&SphP[0].Density, &PrimExch[0].Density, SphP[0].Grad.drho, GRADIENT_TYPE_DENSITY); + + gradient_init(&P[0].Vel[0], &PrimExch[0].VelGas[0], SphP[0].Grad.dvel[0], GRADIENT_TYPE_VELX); + gradient_init(&P[0].Vel[1], &PrimExch[0].VelGas[1], SphP[0].Grad.dvel[1], GRADIENT_TYPE_VELY); + gradient_init(&P[0].Vel[2], &PrimExch[0].VelGas[2], SphP[0].Grad.dvel[2], GRADIENT_TYPE_VELZ); + + gradient_init(&SphP[0].Pressure, &PrimExch[0].Pressure, SphP[0].Grad.dpress, GRADIENT_TYPE_PRESSURE); + +#ifdef MHD + gradient_init(&SphP[0].B[0], &PrimExch[0].B[0], SphP[0].Grad.dB[0], GRADIENT_TYPE_NORMAL); + gradient_init(&SphP[0].B[1], &PrimExch[0].B[1], SphP[0].Grad.dB[1], GRADIENT_TYPE_NORMAL); + gradient_init(&SphP[0].B[2], &PrimExch[0].B[2], SphP[0].Grad.dB[2], GRADIENT_TYPE_NORMAL); +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + MyFloat *addr; + + for(k = 0; k < N_Scalar; k++) + { + addr = (MyFloat *)(((char *)(&SphP[0])) + scalar_elements[k].offset); + gradient_init(addr, &PrimExch[0].Scalars[k], SphP[0].Grad.dscalars[k], GRADIENT_TYPE_NORMAL); + } +#endif /* #ifdef MAXSCALARS */ + + mpi_printf("INIT: %d/%d Gradients used.\n", N_Grad, MAXGRADIENTS); +} + +/*! \brief Initialize a gradient field. + * + * Each time this initialization routine is called, the global variable + * NGrad is incremented by 1. + * + * \param[in] addr Pointer to element in SphP[0] struct (for Vel in P[0]) + * \param[in] addr_exch Pointer to element in PrimExch[0] struct + * \param[in] addr_grad Pointer to element in SphP[0].Grad struct + * \param[in] type Type of gradient + * + * \return void + */ +void gradient_init(MyFloat *addr, MyFloat *addr_exch, MySingle *addr_grad, int type) +{ + if(N_Grad == MAXGRADIENTS) + { + mpi_printf("Failed to register gradient, maximum of %d already reached\n", MAXGRADIENTS); + terminate("MAXGRADIENTS reached"); + } + + grad_elements[N_Grad].type = type; + + if((type == GRADIENT_TYPE_VELX) || (type == GRADIENT_TYPE_VELY) || (type == GRADIENT_TYPE_VELZ)) + { + /* basic structure is P */ + grad_elements[N_Grad].offset = ((char *)addr) - ((char *)&P[0]); + } + else + { + /* basic structure is SphP */ + grad_elements[N_Grad].offset = ((char *)addr) - ((char *)&SphP[0]); + } + + grad_elements[N_Grad].offset_exch = ((char *)addr_exch) - ((char *)&PrimExch[0]); + grad_elements[N_Grad].offset_grad = ((char *)addr_grad) - ((char *)&(SphP[0].Grad)); + + switch(type) + { + case GRADIENT_TYPE_VELX: + GVelx = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_VELY: + GVely = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_VELZ: + GVelz = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_DENSITY: + GDensity = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_PRESSURE: + GPressure = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_UTHERM: + GUtherm = &grad_elements[N_Grad]; + break; + default: + break; + } + + N_Grad++; +} diff --git a/src/amuse/community/arepo/src/hydro/mhd.c b/src/amuse/community/arepo/src/hydro/mhd.c new file mode 100644 index 0000000000..33eaf7eab5 --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/mhd.c @@ -0,0 +1,99 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mhd.c + * \date 05/2018 + * \brief Source terms for MHD implementation needed for cosmological + * MHD equations as well as Powell source terms. + * \details contains functions: + * void do_mhd_source_terms_first_half(void) + * void do_mhd_source_terms_second_half(void) + * void do_mhd_source_terms(void) + * void do_mhd_powell_source_terms(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef MHD + +static void do_mhd_source_terms(void); + +/*! \brief First half of the MHD source terms. + * + * Before hydrodynamics timestep. + * + * \return void + */ +void do_mhd_source_terms_first_half(void) +{ + do_mhd_source_terms(); + update_primitive_variables(); +} + +/*! \brief Second half of the MHD source terms. + * + * After hydrodynamics timestep. + * + * \return void + */ +void do_mhd_source_terms_second_half(void) +{ + do_mhd_source_terms(); + update_primitive_variables(); +} + +/*! \brief Adds source terms of MHD equations in expanding spacetime (i.e. + * in cosmological simulations) to energy. + * + * \return void + */ +void do_mhd_source_terms(void) +{ + TIMER_START(CPU_MHD); + + if(All.ComovingIntegrationOn) + { + double atime = All.Time; + double hubble_a = hubble_function(atime); + + int idx, i; + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + double dt_cell = 0.5 * (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval / + hubble_a; /* half the timestep of the cell */ + SphP[i].Energy += dt_cell * 0.5 * (SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]) * + SphP[i].Volume * atime * hubble_a; + } + } + + TIMER_STOP(CPU_MHD); +} + +#endif /* #ifdef MHD */ diff --git a/src/amuse/community/arepo/src/hydro/riemann.c b/src/amuse/community/arepo/src/hydro/riemann.c new file mode 100644 index 0000000000..24f664352f --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/riemann.c @@ -0,0 +1,955 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/riemann.c + * \date 05/2018 + * \brief Exact, iterative Riemann solver; both adiabatic and isothermal. + * \details contains functions: + * double godunov_flux_3d(struct state *st_L, struct state + * *st_R, struct state_face *st_face) + * void sample_solution_vaccum_left_3d(double S, struct state + * *st_R, struct state_face *st_face) + * void sample_solution_vaccum_right_3d(double S, struct state + * *st_L, struct state_face *st_face) + * void sample_solution_vacuum_generate_3d(double S, struct + * state *st_L, struct state *st_R, struct state_face + * *st_face) + * void get_mach_numbers(struct state *st_L, struct state + * *st_R, double Press) + * void sample_solution_3d(double S, struct state *st_L, + * struct state *st_R, double Press, double Vel, struct + * state_face *st_face) + * int riemann(struct state *st_L, struct state *st_R, double + * *Press, double *Vel) + * void pressure_function(double P, struct state *st, double *F, + * double *FD) + * double guess_for_pressure(struct state *st_L, + * struct state *st_R) + * void riemann_isotherm(struct state *st_L, struct state *st_R, + * double *Rho, double *Vel, double csnd) + * void isothermal_function(double rhostar, double rho, + * double *F, double *FD) + * void sample_solution_isothermal3d(double S, struct state + * *st_L, struct state *st_R, double Rho, double Vel, + * struct state_face *st_face, double csnd) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#if !(defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)) + +#define GAMMA_G1 ((GAMMA - 1.0) / (2.0 * GAMMA)) +#define GAMMA_G2 ((GAMMA + 1.0) / (2.0 * GAMMA)) +#define GAMMA_G3 ((2.0 * GAMMA / (GAMMA - 1.0))) +#define GAMMA_G4 (2.0 / (GAMMA - 1.0)) +#define GAMMA_G5 (2.0 / (GAMMA + 1.0)) +#define GAMMA_G6 ((GAMMA - 1.0) / (GAMMA + 1.0)) +#define GAMMA_G7 (0.5 * (GAMMA - 1.0)) +#define GAMMA_G8 (1.0 / GAMMA) +#define GAMMA_G9 (GAMMA - 1.0) + +#define TOL 1.0e-8 + +/*! \brief Calculates face state from Riemann problem. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[out] st_face State at face. + * + * \return 0. + */ +double godunov_flux_3d(struct state *st_L, struct state *st_R, struct state_face *st_face) +{ + double Vel; + +#ifndef ISOTHERM_EQS + { + if(st_L->press == 0 && st_R->press == 0) + { + /* vacuum state */ + st_face->velx = 0; + st_face->rho = 0; + st_face->press = 0; + st_face->vely = 0; + st_face->velz = 0; +#ifdef MAXSCALARS + st_face->scalars = NULL; +#endif /* #ifdef MAXSCALARS */ + return 0; + } + + if(st_L->rho > 0 && st_R->rho > 0) + { + st_L->csnd = sqrt(GAMMA * st_L->press / st_L->rho); + st_R->csnd = sqrt(GAMMA * st_R->press / st_R->rho); + + double Press; + + if(riemann(st_L, st_R, &Press, &Vel)) + { + sample_solution_3d(0.0, /* S=x/t */ + st_L, st_R, Press, Vel, st_face); + return Press; + } + else + { + /* ICs lead to vacuum, need to sample vacuum solution */ + + sample_solution_vacuum_generate_3d(0.0, /* S=x/t */ + st_L, st_R, st_face); + return 0; + } + } + else + { + if(st_L->rho == 0 && st_R->rho > 0) + { + sample_solution_vacuum_left_3d(0.0, /* S=x/t */ + st_R, st_face); + return 0; + } + else if(st_R->rho == 0 && st_L->rho > 0) + { + sample_solution_vacuum_right_3d(0.0, /* S=x/t */ + st_L, st_face); + return 0; + } + else if(st_R->rho == 0 && st_L->rho == 0) + { + /* vacuum state */ + st_face->velx = 0; + st_face->rho = 0; + st_face->press = 0; + st_face->vely = 0; + st_face->velz = 0; +#ifdef MAXSCALARS + st_face->scalars = NULL; +#endif /* #ifdef MAXSCALARS */ + } + else + { + terminate("one of the densities is negative\n"); + } + return 0; + } + } + +#else /* #ifndef ISOTHERM_EQS */ + double Rho; + double csnd; + + csnd = All.IsoSoundSpeed; + riemann_isotherm(st_L, st_R, &Rho, &Vel, csnd); + + sample_solution_isothermal3d(0.0, /* S=x/t */ + st_L, st_R, Rho, Vel, st_face, csnd); + + st_face->press = st_face->rho * csnd * csnd; + + return 0; +#endif /* #ifndef ISOTHERM_EQS #else */ +} + +/*! \brief Sample solution for a vacuum state at the left hand side. + * + * \param[in] S Position x / t. + * \param[in] st_R Right hand side state. + * \param[out] st_face State at face. + * + * \return void + */ +void sample_solution_vacuum_left_3d(double S, struct state *st_R, struct state_face *st_face) +{ + double Csnd; + + double Sr = st_R->velx - 2 * st_R->csnd / GAMMA_MINUS1; + + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; +#ifdef MAXSCALARS + st_face->scalars = st_R->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(S >= Sr) + { + /* right fan */ + + double shr = st_R->velx + st_R->csnd; + + if(S >= shr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->press = st_R->press; + } + else + { + /* rarefaction fan right state */ + st_face->velx = GAMMA_G5 * (-st_R->csnd + GAMMA_G7 * st_R->velx + S); + Csnd = GAMMA_G5 * (st_R->csnd - GAMMA_G7 * (st_R->velx - S)); + st_face->rho = st_R->rho * pow(Csnd / st_R->csnd, GAMMA_G4); + st_face->press = st_R->press * pow(Csnd / st_R->csnd, GAMMA_G3); + } + } + else + { + /* vacuum state */ + st_face->velx = Sr; + st_face->rho = 0; + st_face->press = 0; + } +} + +/*! \brief Sample solution for a vacuum state at the right hand side. + * + * \param[in] S S Position x / t. + * \param[in] st_L Left hand side state. + * \param[out] st_face State at face. + * + * \return void + */ +void sample_solution_vacuum_right_3d(double S, struct state *st_L, struct state_face *st_face) +{ + double Csnd; + + double Sl = st_L->velx + 2 * st_L->csnd / GAMMA_MINUS1; + + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; +#ifdef MAXSCALARS + st_face->scalars = st_L->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(S <= Sl) + { + /* left fan */ + + double shl = st_L->velx - st_L->csnd; + + if(S <= shl) + { + /* left data state */ + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->press = st_L->press; + } + else + { + /* rarefaction fan left state */ + st_face->velx = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * st_L->velx + S); + Csnd = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * (st_L->velx - S)); + st_face->rho = st_L->rho * pow(Csnd / st_L->csnd, GAMMA_G4); + st_face->press = st_L->press * pow(Csnd / st_L->csnd, GAMMA_G3); + } + } + else + { + /* vacuum in between */ + st_face->velx = Sl; + st_face->rho = 0; + st_face->press = 0; + } +} + +/*! \brief Sample solution for vacuum states. + * + * \param[in] S S Position x / t + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[out] st_face State at face. + * + * \return void + */ +void sample_solution_vacuum_generate_3d(double S, struct state *st_L, struct state *st_R, struct state_face *st_face) +{ + double Csnd; + + double Sl = st_L->velx + 2 * st_L->csnd / GAMMA_MINUS1; + double Sr = st_R->velx - 2 * st_R->csnd / GAMMA_MINUS1; + + if(S <= Sl) + { + /* left fan */ + + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; +#ifdef MAXSCALARS + st_face->scalars = st_L->scalars; +#endif /* #ifdef MAXSCALARS */ + + double shl = st_L->velx - st_L->csnd; + + if(S <= shl) + { + /* left data state */ + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->press = st_L->press; + } + else + { + /* rarefaction fan left state */ + st_face->velx = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * st_L->velx + S); + Csnd = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * (st_L->velx - S)); + st_face->rho = st_L->rho * pow(Csnd / st_L->csnd, GAMMA_G4); + st_face->press = st_L->press * pow(Csnd / st_L->csnd, GAMMA_G3); + } + } + else if(S >= Sr) + { + /* right fan */ + + double shr = st_R->velx + st_R->csnd; + + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; +#ifdef MAXSCALARS + st_face->scalars = st_R->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(S >= shr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->press = st_R->press; + } + else + { + /* rarefaction fan right state */ + st_face->velx = GAMMA_G5 * (-st_R->csnd + GAMMA_G7 * st_R->velx + S); + Csnd = GAMMA_G5 * (st_R->csnd - GAMMA_G7 * (st_R->velx - S)); + st_face->rho = st_R->rho * pow(Csnd / st_R->csnd, GAMMA_G4); + st_face->press = st_R->press * pow(Csnd / st_R->csnd, GAMMA_G3); + } + } + else + { + /* vacuum in between */ + st_face->velx = S; + st_face->rho = 0; + st_face->press = 0; + + st_face->vely = st_L->vely + (st_R->vely - st_L->vely) * (S - Sl) / (Sr - Sl); + st_face->velz = st_L->velz + (st_R->velz - st_L->velz) * (S - Sl) / (Sr - Sl); + +#ifdef MAXSCALARS + st_face->scalars = NULL; +#endif /* #ifdef MAXSCALARS */ + } +} + +/* \brief Calculates Mach numbers of shocks from Riemann problem. + * + * Mostly used for statistics. + * + * \param[in] S Position x / t + * \param[in, out] st_L Left hand side state. + * \param[in, out] st_R Right hand side state. + * \param[in] Press Central pressure + * + * \return void + */ +void get_mach_numbers(struct state *st_L, struct state *st_R, double Press) +{ +#if defined GODUNOV_STATS + if(Press <= st_L->press) /* left fan */ + { + st_L->mach = 0; + } + else /* left shock */ + { + double pml = Press / st_L->press; + st_L->mach = sqrt(GAMMA_G2 * pml + GAMMA_G1); + } + + if(Press > st_R->press) /* right shock */ + { + double pmr = Press / st_R->press; + st_R->mach = sqrt(GAMMA_G2 * pmr + GAMMA_G1); + } + else + { + st_R->mach = 0; + } +#endif /* #if defined GODUNOV_STATS */ +} + +/*! \brief Samples 3d solution to Riemann problem. + * + * \param[in] S Position x / t. + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in] Press Pressure in central region. + * \param[in] Vel Velocity in central region. + * \param[out] st_face State at face. + * + * \return void + */ +void sample_solution_3d(double S, struct state *st_L, struct state *st_R, double Press, double Vel, struct state_face *st_face) +{ + double Csnd; + + if(S <= Vel) /* sample point is left of contact */ + { + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; +#ifdef MAXSCALARS + st_face->scalars = st_L->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(Press <= st_L->press) /* left fan */ + { + double shl = st_L->velx - st_L->csnd; + + if(S <= shl) /* left data state */ + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->press = st_L->press; + } + else + { + double cml = st_L->csnd * pow(Press / st_L->press, GAMMA_G1); + double stl = Vel - cml; + + if(S > stl) /* middle left state */ + { + st_face->rho = st_L->rho * pow(Press / st_L->press, GAMMA_G8); + st_face->velx = Vel; + st_face->press = Press; + } + else /* left state inside fan */ + { + st_face->velx = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * st_L->velx + S); + Csnd = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * (st_L->velx - S)); + st_face->rho = st_L->rho * pow(Csnd / st_L->csnd, GAMMA_G4); + st_face->press = st_L->press * pow(Csnd / st_L->csnd, GAMMA_G3); + } + } + } + else /* left shock */ + { + if(st_L->press > 0) + { + double pml = Press / st_L->press; + double sl = st_L->velx - st_L->csnd * sqrt(GAMMA_G2 * pml + GAMMA_G1); + + if(S <= sl) /* left data state */ + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->press = st_L->press; + } + else /* middle left state behind shock */ + { + st_face->rho = st_L->rho * (pml + GAMMA_G6) / (pml * GAMMA_G6 + 1.0); + st_face->velx = Vel; + st_face->press = Press; + } + } + else + { + st_face->rho = st_L->rho / GAMMA_G6; + st_face->velx = Vel; + st_face->press = Press; + } + } + } + else /* right of contact */ + { + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; +#ifdef MAXSCALARS + st_face->scalars = st_R->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(Press > st_R->press) /* right shock */ + { + if(st_R->press > 0) + { + double pmr = Press / st_R->press; + double sr = st_R->velx + st_R->csnd * sqrt(GAMMA_G2 * pmr + GAMMA_G1); + + if(S >= sr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->press = st_R->press; + } + else /* middle right state behind shock */ + { + st_face->rho = st_R->rho * (pmr + GAMMA_G6) / (pmr * GAMMA_G6 + 1.0); + st_face->velx = Vel; + st_face->press = Press; + } + } + else + { + st_face->rho = st_R->rho / GAMMA_G6; + st_face->velx = Vel; + st_face->press = Press; + } + } + else /* right fan */ + { + double shr = st_R->velx + st_R->csnd; + + if(S >= shr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->press = st_R->press; + } + else + { + double cmr = st_R->csnd * pow(Press / st_R->press, GAMMA_G1); + double str = Vel + cmr; + + if(S <= str) /* middle right state */ + { + st_face->rho = st_R->rho * pow(Press / st_R->press, GAMMA_G8); + st_face->velx = Vel; + st_face->press = Press; + } + else /* fan right state */ + { + st_face->velx = GAMMA_G5 * (-st_R->csnd + GAMMA_G7 * st_R->velx + S); + Csnd = GAMMA_G5 * (st_R->csnd - GAMMA_G7 * (st_R->velx - S)); + st_face->rho = st_R->rho * pow(Csnd / st_R->csnd, GAMMA_G4); + st_face->press = st_R->press * pow(Csnd / st_R->csnd, GAMMA_G3); + } + } + } + } +} + +/*! \brief Riemann-solver; i.e. iterative solver of central pressure of a + * Riemann problem. + * + * Solution via root-finding of pressure function. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in, out] Press Central pressure; needs some initial guess. + * \param[out] Vel Velocity in central region. + * + * \return 0: failed, 1: success. + */ +int riemann(struct state *st_L, struct state *st_R, double *Press, double *Vel) +{ + double F_L, FD_L, F_R, FD_R, pold; + + double dVel = st_R->velx - st_L->velx; + + double critVel = GAMMA_G4 * (st_L->csnd + st_R->csnd) - dVel; + + if(critVel < 0) + { + /* + printf("ICs lead to vacuum. stopping. Csnd_L=%g Csnd_R=%g dVel=%g\n", Csnd_L, Csnd_R, dVel); + */ + return 0; + } + + double p = guess_for_pressure(st_L, st_R); + + int iter = 0; + + do /* newton-raphson scheme */ + { + pold = p; + + pressure_function(p, st_L, &F_L, &FD_L); + pressure_function(p, st_R, &F_R, &FD_R); + + if(iter < MAXITER / 2) + p -= (F_L + F_R + dVel) / (FD_L + FD_R); + else + p -= 0.5 * (F_L + F_R + dVel) / (FD_L + FD_R); + + if(p < 0.1 * pold) + p = 0.1 * pold; + + pressure_function(p, st_L, &F_L, &FD_L); + pressure_function(p, st_R, &F_R, &FD_R); + + if(iter < MAXITER / 2) + p -= (F_L + F_R + dVel) / (FD_L + FD_R); + else + p -= 0.5 * (F_L + F_R + dVel) / (FD_L + FD_R); + + if(p < 0.1 * pold) + p = 0.1 * pold; + + iter++; + } + while(2 * fabs((p - pold) / (p + pold)) > TOL && iter < MAXITER); + + if(iter >= MAXITER) + { + printf("Task=%d: Warning: ICs for riemann solver lead to divergence.\n", ThisTask); + printf("Rho_L=%g Vel_L=%g Press_L=%g Csnd_L=%g\n", st_L->rho, st_L->velx, st_L->press, st_L->csnd); + printf("Rho_R=%g Vel_R=%g Press_R=%g Csnd_R=%g\n", st_R->rho, st_R->velx, st_R->press, st_R->csnd); + printf("Adopted solution: Press=%g Vel=%g\n", p, 0.5 * (st_L->velx + st_R->velx + F_R - F_L)); + + FILE *fd; + + if((fd = fopen("riemann.dat", "w"))) + { + fwrite(&st_L->rho, sizeof(double), 1, fd); + fwrite(&st_L->velx, sizeof(double), 1, fd); + fwrite(&st_L->press, sizeof(double), 1, fd); + fwrite(&st_L->csnd, sizeof(double), 1, fd); + fwrite(&st_R->rho, sizeof(double), 1, fd); + fwrite(&st_R->velx, sizeof(double), 1, fd); + fwrite(&st_R->press, sizeof(double), 1, fd); + fwrite(&st_R->csnd, sizeof(double), 1, fd); + fclose(fd); + } + } + + /* prepare output values */ + *Press = p; + *Vel = 0.5 * (st_L->velx + st_R->velx + F_R - F_L); + + return 1; +} + +/*! \brief Pressure function for root-finding. + * + * \param[in] P Pressure. + * \param[in] st Hydrodynamic state. + * \param[out] F pressure function. + * \param[out] FD derivative of pressure function. + * + * \return void + */ +void pressure_function(double P, struct state *st, double *F, double *FD) +{ + if(P <= st->press) /* rarefaction wave */ + { + double prat = P / st->press; + + *F = GAMMA_G4 * st->csnd * (pow(prat, GAMMA_G1) - 1.0); + *FD = (1.0 / (st->rho * st->csnd)) * pow(prat, -GAMMA_G2); + } + else /* shock wave */ + { + double ak = GAMMA_G5 / st->rho; + double bk = GAMMA_G6 * st->press; + double qrt = sqrt(ak / (bk + P)); + + *F = (P - st->press) * qrt; + *FD = (1.0 - 0.5 * (P - st->press) / (bk + P)) * qrt; + } +} + +/*! \brief Returns initial guess for central pressure of the Riemann problem. + * + * This is used as the starting value for the root-finding iteration. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * + * \return Guess for pressure in central region. + */ +double guess_for_pressure(struct state *st_L, struct state *st_R) +{ +#define QMAX 2.0 + + double pmin, pmax; + + double pv = + 0.5 * (st_L->press + st_R->press) - 0.125 * (st_R->velx - st_L->velx) * (st_L->rho + st_R->rho) * (st_L->csnd + st_R->csnd); + + if(st_L->press < st_R->press) + { + pmin = st_L->press; + pmax = st_R->press; + } + else + { + pmin = st_R->press; + pmax = st_L->press; + } + + if(pmin > 0) + { + double qrat = pmax / pmin; + + if(qrat <= QMAX && (pmin <= pv && pv <= pmax)) + { + if(pv < 0) + { + printf("pv=%g\n", pv); + terminate("negative pv"); + } + + return pv; + } + else + { + if(pv < pmin) /* use two-rarefaction solution */ + { + double pnu = (st_L->csnd + st_R->csnd) - GAMMA_G7 * (st_R->velx - st_L->velx); + double pde = st_L->csnd / pow(st_L->press, GAMMA_G1) + st_R->csnd / pow(st_R->press, GAMMA_G1); + + return pow(pnu / pde, GAMMA_G3); + } + else /* two-shock approximation */ + { + double gel = sqrt((GAMMA_G5 / st_L->rho) / (GAMMA_G6 * st_L->press + pv)); + double ger = sqrt((GAMMA_G5 / st_R->rho) / (GAMMA_G6 * st_R->press + pv)); + double x = (gel * st_L->press + ger * st_R->press - (st_R->velx - st_L->velx)) / (gel + ger); + + if(x < pmin || x > pmax) + { + x = pmin; + } + + return x; + } + } + } + else + { + return (pmin + pmax) / 2; + } +} + +/*! \brief Riemann-solver for isothermal gas. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in, out] Rho Central density; needs some initial guess. + * \param[out] Vel Velocity in central region. + * \param[in] csnd Sound speed. + * + * \return void + */ +void riemann_isotherm(struct state *st_L, struct state *st_R, double *Rho, double *Vel, double csnd) +{ + double F_L, FD_L, F_R, FD_R, rhoold, drho; + double rho; + + double dVel = (st_R->velx - st_L->velx) / csnd; + + if(dVel > 0) + rho = sqrt(st_L->rho * st_R->rho * exp(-dVel)); + else + rho = 0.5 * (st_L->rho + st_R->rho); + + int iter = 0; + + if(st_L->rho <= 0 || st_R->rho <= 0) + terminate("isothermal Riemann solver was called with zero or negative density\n"); + + do /* newton-raphson scheme */ + { + isothermal_function(rho, st_L->rho, &F_L, &FD_L); + isothermal_function(rho, st_R->rho, &F_R, &FD_R); + + rhoold = rho; + drho = -0.5 * (F_L + F_R + dVel) / (FD_L + FD_R); + + if(fabs(drho) > 0.25 * rho) + drho = 0.25 * rho * fabs(drho) / drho; + + rho += drho; + + iter++; + } + while(2 * fabs(rho - rhoold) / (st_L->rho + st_R->rho) > TOL && iter < MAXITER); + + if(iter >= MAXITER) + { +#ifndef LONGIDS + printf("ID_L=%u ID_R=%u Rho_L=%g Rho_R=%g Vel_L=%g Vel_R=%g\n", st_L->ID, st_R->ID, st_L->rho, st_R->rho, st_L->velx, + st_R->velx); +#else /* #ifndef LONGIDS */ + printf("ID_L=%llu ID_R=%llu Rho_L=%g Rho_R=%g Vel_L=%g Vel_R=%g\n", st_L->ID, st_R->ID, st_L->rho, st_R->rho, st_L->velx, + st_R->velx); +#endif /* #ifndef LONGIDS #else */ + terminate("ICs for isothermal riemann solver lead to divergence. stopping."); + /* + *Rho = 0.5 * (Rho_L + Rho_R); + *Vel = 0.5 * (Vel_L + Vel_R); + return; + */ + } + + /* prepare output values */ + *Rho = rho; + *Vel = 0.5 * (st_L->velx + st_R->velx + csnd * (F_R - F_L)); +} + +/*! \brief "Pressure" function for isothermal gas. + * + * Needed for root-finding in riemann_isotherm. + * + * \param[in] rhostar Central density. + * \param[in] rho External density. + * \param[out] F Isotherma function. + * \param[out] FD Derivative of isothermal function. + * + * \return void + */ +void isothermal_function(double rhostar, double rho, double *F, double *FD) +{ + if(rhostar <= rho) /* rarefaction wave */ + { + *F = log(rhostar / rho); + *FD = 1.0 / rho; + } + else /* shock wave */ + { + *F = (rhostar - rho) / sqrt(rhostar * rho); + *FD = 0.5 / rhostar * (sqrt(rhostar / rho) + sqrt(rho / rhostar)); + } +} + +/*! \brief Samples 3d solution to Riemann problem with isothermal gas. + * + * \param[in] S S Position x / t. + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in] Rho central density. + * \paramm[in] Vel Velocity in central region. + * \param[out] st_face State at face. + * \param[in] csnd Sound speed. + * + * \return void + */ +void sample_solution_isothermal3d(double S, struct state *st_L, struct state *st_R, double Rho, double Vel, struct state_face *st_face, + double csnd) +{ + if(S <= Vel) /* sample point is left of contact */ + { + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; +#ifdef MAXSCALARS + st_face->scalars = st_L->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(Rho <= st_L->rho) /* left fan */ + { + double shl = st_L->velx - csnd; + + if(S <= shl) /* left data state */ + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + } + else + { + double stl = Vel - csnd; + + if(S > stl) /* middle left state */ + { + st_face->rho = Rho; + st_face->velx = Vel; + } + else /* left state inside fan */ + { + st_face->velx = S + csnd; + st_face->rho = st_L->rho * exp(-((S + csnd) - st_L->velx) / csnd); + } + } + } + else /* left shock */ + { + double sl = (st_L->rho * st_L->velx - Rho * Vel) / (st_L->rho - Rho); + + if(S <= sl) /* left data state */ + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + } + else /* left state behind shock */ + { + st_face->rho = Rho; + st_face->velx = Vel; + } + } + } + else /* right of contact */ + { + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; +#ifdef MAXSCALARS + st_face->scalars = st_R->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(Rho > st_R->rho) /* right shock */ + { + double sr = (st_R->rho * st_R->velx - Rho * Vel) / (st_R->rho - Rho); + + if(S >= sr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + } + else /* right state behind shock */ + { + st_face->rho = Rho; + st_face->velx = Vel; + } + } + else /* right fan */ + { + double shr = st_R->velx + csnd; + + if(S >= shr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + } + else + { + double str = Vel + csnd; + + if(S <= str) /* middle right state */ + { + st_face->rho = Rho; + st_face->velx = Vel; + } + else /* fan right state */ + { + st_face->velx = S - csnd; + st_face->rho = st_R->rho * exp(((S - csnd) - st_R->velx) / csnd); + } + } + } + } +} + +#endif /* #if !(defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)) */ diff --git a/src/amuse/community/arepo/src/hydro/riemann_hllc.c b/src/amuse/community/arepo/src/hydro/riemann_hllc.c new file mode 100644 index 0000000000..80fb519ceb --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/riemann_hllc.c @@ -0,0 +1,213 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/riemann_hllc.c + * \date 05/2018 + * \brief Routines for a HLLC Riemann solver. + * \details contains functions: + * static void hllc_get_fluxes_from_state(struct state *st, + * struct fluxes *flux) + * static double get_hllc_star_fluxes(const struct state *st, + * const struct fluxes *flux, struct fluxes *hllc_flux, + * double S_star, double S) + * double godunov_flux_3d_hllc(struct state *st_L, struct state + * *st_R, struct state_face *st_face, struct fluxes *flux) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#if defined(RIEMANN_HLLC) + +#if defined(RIEMANN_HLLD) +#error option RIEMANN_HLLC is incompatible with option RIEMANN_HLLD. +Only one Riemann solver can be chosen among the above options.If none of them is selected, + the exact Riemann solver will be used. +#endif /* #if defined(RIEMANN_HLLD) */ + /*! \brief Calculates the flux from a state. + * + * Mass, momentum and energy flux. + * + * \param[in] st State. + * \param[out] flux Flux corresponding to the state. + * + * \return void + */ + static void + hllc_get_fluxes_from_state(struct state *st, struct fluxes *flux) +{ + flux->mass = st->rho * st->velx; + flux->momentum[0] = st->rho * st->velx * st->velx + st->press; + flux->momentum[1] = st->rho * st->velx * st->vely; + flux->momentum[2] = st->rho * st->velx * st->velz; + + st->Energy = st->press / GAMMA_MINUS1 + 0.5 * st->rho * (st->velx * st->velx + st->vely * st->vely + st->velz * st->velz); + flux->energy = (st->Energy + st->press) * st->velx; +} + +/*! \brief Calculates a central flux in HLLC approximation. + * + * \param[in] st State of the Riemann problem (either left or right). + * \param[in] flux Flux through face (either left or right). + * \param[out] hllc_flux State at the face (determined by this routine). + * \param[in] S_star speed of characteristics in central region. + * \param[in] S speed of characteristics in outside state (left or right). + * + * \return Central density. + */ +static double get_hllc_star_fluxes(const struct state *st, const struct fluxes *flux, struct fluxes *hllc_flux, double S_star, + double S) +{ + double Q0 = st->rho * (S - st->velx) / (S - S_star); + double Q1 = Q0 * S_star; + double Q2 = Q0 * st->vely; + double Q3 = Q0 * st->velz; + double Q4 = Q0 * (st->Energy / st->rho + (S_star - st->velx) * (S_star + st->press / (st->rho * (S - st->velx)))); + + hllc_flux->mass = flux->mass + S * (Q0 - st->rho); + + hllc_flux->momentum[0] = flux->momentum[0] + S * (Q1 - st->rho * st->velx); + + hllc_flux->momentum[1] = flux->momentum[1] + S * (Q2 - st->rho * st->vely); + + hllc_flux->momentum[2] = flux->momentum[2] + S * (Q3 - st->rho * st->velz); + + hllc_flux->energy = flux->energy + S * (Q4 - st->Energy); + + return Q0; +} + +/*! \brief Main routine for the hllc Riemann solver. + * + * Called in finite_volume_solver.c + * + * \param[in] st_L Left state of the Riemann problem. + * \param[in] st_R Right state of the Riemann problem. + * \param[out] st_face State at face. + * \param[out] flux Flux through face. + * + * \return Pressure. + */ +double godunov_flux_3d_hllc(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux) +{ + double S_L, S_R, S_star; + double Press_star, rho_star; + double rho_hat, csnd_hat; + + if(st_L->rho > 0 && st_R->rho > 0) + { + struct fluxes flux_L, flux_R; + + st_L->csnd = sqrt(GAMMA * st_L->press / st_L->rho); + st_R->csnd = sqrt(GAMMA * st_R->press / st_R->rho); + + /* first estimate wave speeds */ + S_L = dmin(st_L->velx - st_L->csnd, st_R->velx - st_R->csnd); + S_R = dmax(st_L->velx + st_L->csnd, st_R->velx + st_R->csnd); + + rho_hat = 0.5 * (st_L->rho + st_R->rho); + csnd_hat = 0.5 * (st_L->csnd + st_R->csnd); + Press_star = 0.5 * ((st_L->press + st_R->press) + (st_L->velx - st_R->velx) * (rho_hat * csnd_hat)); + S_star = 0.5 * ((st_L->velx + st_R->velx) + (st_L->press - st_R->press) / (rho_hat * csnd_hat)); + + /* compute fluxes for the left and right states */ + hllc_get_fluxes_from_state(st_L, &flux_L); + hllc_get_fluxes_from_state(st_R, &flux_R); + + if(S_L >= 0.0) /* F_hllc = F_L */ + { + /* copy the fluxes from the left state */ + flux->mass = flux_L.mass; + flux->momentum[0] = flux_L.momentum[0]; + flux->momentum[1] = flux_L.momentum[1]; + flux->momentum[2] = flux_L.momentum[2]; + flux->energy = flux_L.energy; + + /* set the primitive variables at the face */ + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; + st_face->press = st_L->press; + } + else if(S_R <= 0.0) /* F_hllc = F_R */ + { + /* copy the fluxes from the left state */ + flux->mass = flux_R.mass; + flux->momentum[0] = flux_R.momentum[0]; + flux->momentum[1] = flux_R.momentum[1]; + flux->momentum[2] = flux_R.momentum[2]; + flux->energy = flux_R.energy; + + /* set the primitive variables at the face */ + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; + st_face->press = st_R->press; + } + else if(S_L <= 0.0 && S_star >= 0.0) /* F_hllc = F*_L */ + { + /* compute star flux */ + rho_star = get_hllc_star_fluxes(st_L, &flux_L, flux, S_star, S_L); + + /* set the primitive variables at the face */ + st_face->rho = rho_star; + st_face->velx = S_star; + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; + st_face->press = Press_star; + } + else /* F_hllc = F*_R */ + { + /* compute star flux */ + rho_star = get_hllc_star_fluxes(st_R, &flux_R, flux, S_star, S_R); + + /* set the primitive variables at the face */ + st_face->rho = rho_star; + st_face->velx = S_star; + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; + st_face->press = Press_star; + } + } + else + { + printf("Left: st_L->press=%g st_L->rho=%g st_L->velx=%g\n", st_L->press, st_L->rho, st_L->velx); + printf("Right: st_R->press=%g st_R->rho=%g st_R->velx=%g\n", st_R->press, st_R->rho, st_R->velx); + terminate("density is zero\n"); + return 0; + } + + return st_face->press; +} + +#endif /* #if defined(RIEMANN_HLLC) */ diff --git a/src/amuse/community/arepo/src/hydro/riemann_hlld.c b/src/amuse/community/arepo/src/hydro/riemann_hlld.c new file mode 100644 index 0000000000..8770282bd4 --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/riemann_hlld.c @@ -0,0 +1,567 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/riemann_hlld.c + * \date 05/2018 + * \brief Routines for a HLLD Riemann solver (to be used for MHD). + * \details contains functions: + * static inline int state_and_flux_valid(const struct state + * *st, const struct fluxes *flux) + * double godunov_flux_3d_hlld(struct state *st_L, struct state + * *st_R, double *vel_face, struct state_face *st_face, + * struct fluxes *flux) + * static double hlld_get_fast_wave(struct state *st) + * static void hlld_get_fluxes_from_state(struct state *st, + * struct fluxes *flux, double *st_ptot) + * static void hlld_get_star(struct state *st_star, struct + * state *st, double S, double S_M, double ptot, double + * ptot_star) + * static void hlld_get_fluxes_star(struct state *st_A, struct + * state *st_A_star, struct fluxes *flux_A, double S_A, + * struct fluxes *flux) + * static void hlld_get_starstar_L(struct state *st_star_L, + * struct state *st_star_R, struct state *st_starstar) + * static void hlld_get_starstar_R(struct state *st_star_L, + * struct state *st_star_R, struct state *st_starstar) + * static void hlld_get_starstar(struct state *st_star_L, + * struct state *st_star_R, struct state *st_starstar, + * struct state *st_star_A, double sign) + * static void hlld_get_fluxes_starstar(struct state *st_A, + * struct state *st_A_star, struct state *st_A_starstar, + * struct fluxes *flux_A, double S_A, double S_A_star, struct + * fluxes *flux) + * static void hll_get_star(struct state *st_star, struct + * fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, + * struct state *st_R, double S_L, double S_R) + * static void hll_get_flux(struct fluxes *flux, struct fluxes + * *flux_L, struct fluxes *flux_R, struct state *st_L, + * struct state *st_R, double S_L, double S_R) + * static void lax_get_flux(struct fluxes *flux, struct fluxes + * *flux_L, struct fluxes *flux_R, struct state *st_L, struct + * state *st_R, double S) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#if defined(RIEMANN_HLLD) + +static double hlld_get_fast_wave(struct state *st); +static void hlld_get_fluxes_from_state(struct state *st_face, struct fluxes *flux, double *st_ptot); +static void hlld_get_star(struct state *st_star, struct state *st, double S, double S_M, double ptot, double ptot_star); +static void hlld_get_fluxes_star(struct state *st_A, struct state *st_A_star, struct fluxes *flux_A, double S_A, struct fluxes *flux); +static void hlld_get_starstar_L(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar); +static void hlld_get_starstar_R(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar); +static void hlld_get_starstar(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar, struct state *st_star_A, + double sign); +static void hlld_get_fluxes_starstar(struct state *st_A, struct state *st_A_star, struct state *st_A_starstar, struct fluxes *flux_A, + double S_A, double S_A_star, struct fluxes *flux); +static void hll_get_star(struct state *st_star, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S_L, double S_R); +static void hll_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S_L, double S_R); +static void lax_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S); + +/*! \brief Check if pressure, energy and energy flux have valid values. + * + * \param[in] st State. + * \param[in] flux Flux. + * + * \return 1 if valid state and flux, 0 otherwise. + */ +static inline int state_and_flux_valid(const struct state *st, const struct fluxes *flux) +{ + return (st->press >= 0) && gsl_finite(st->press) && gsl_finite(flux->energy); +} + +/*! \brief Main routine for the hlld Riemann solver. + * + * Called in finite_volume_solver.c. + * + * \param[in] st_L Left state of the Riemann problem. + * \param[in] st_R Right state of the Riemann problem. + * \param[in] vel_face Velocity at which the face is moving. + * \param[out] st_face State at face. + * \param[out] flux Flux through face. + * + * \return Pressure. + */ +double godunov_flux_3d_hlld(struct state *st_L, struct state *st_R, double *vel_face, struct state_face *st_face, struct fluxes *flux) +{ + struct state st_Lstar, st_Rstar, st_star; + struct state st_Lstarstar, st_Rstarstar; + struct state *st_middle; + double Bx; + double cf_L, cf_R; + double S, S_L, S_R, S_M, S_L_star, S_R_star; + double ptot_L, ptot_R; + + S_R_star = S_L_star = S_M = 0.; + + if(st_L->rho > 0 && st_R->rho > 0) + { + Bx = 0.5 * (st_L->Bx + st_R->Bx); + flux->B[0] = 0.; + + st_L->Bx = Bx; + st_R->Bx = Bx; + st_face->Bx = Bx; + + /* get wave speeds first */ + cf_L = hlld_get_fast_wave(st_L); + cf_R = hlld_get_fast_wave(st_R); + + S = dmax(dmax(fabs(st_L->velx - cf_L), fabs(st_R->velx - cf_R)), dmax(fabs(st_L->velx + cf_L), fabs(st_R->velx + cf_R))); + + S_L = dmin(st_L->velx - cf_L, st_R->velx - cf_R); + S_R = dmax(st_L->velx + cf_L, st_R->velx + cf_R); + + if(S_L >= 0) + { + st_middle = st_L; + hlld_get_fluxes_from_state(st_L, flux, NULL); + } + else if(S_R <= 0) + { + st_middle = st_R; + hlld_get_fluxes_from_state(st_R, flux, NULL); + } + else + { + // stars are needed + struct fluxes flux_R, flux_L; + + hlld_get_fluxes_from_state(st_L, &flux_L, &ptot_L); + hlld_get_fluxes_from_state(st_R, &flux_R, &ptot_R); + + S_M = ((S_R - st_R->velx) * st_R->rho * st_R->velx - (S_L - st_L->velx) * st_L->rho * st_L->velx - ptot_R + ptot_L) / + ((S_R - st_R->velx) * st_R->rho - (S_L - st_L->velx) * st_L->rho); + + double ptot_star = ((S_R - st_R->velx) * st_R->rho * ptot_L - (S_L - st_L->velx) * st_L->rho * ptot_R + + st_L->rho * st_R->rho * (S_R - st_R->velx) * (S_L - st_L->velx) * (st_R->velx - st_L->velx)) / + ((S_R - st_R->velx) * st_R->rho - (S_L - st_L->velx) * st_L->rho); + + hlld_get_star(&st_Lstar, st_L, S_L, S_M, ptot_L, ptot_star); + hlld_get_star(&st_Rstar, st_R, S_R, S_M, ptot_R, ptot_star); + + S_L_star = S_M - fabs(st_L->Bx) / sqrt(st_Lstar.rho); + S_R_star = S_M + fabs(st_R->Bx) / sqrt(st_Rstar.rho); + + if(S_L_star >= 0 || (Bx == 0 && S_M >= 0)) // we already know: S_L <= 0 + { + st_middle = &st_Lstar; + hlld_get_fluxes_star(st_L, &st_Lstar, &flux_L, S_L, flux); + } + else if(S_R_star <= 0 || (Bx == 0)) // we already know: S_R >= 0 + { + st_middle = &st_Rstar; + hlld_get_fluxes_star(st_R, &st_Rstar, &flux_R, S_R, flux); + } + else + { + // double stars are needed + if(S_M >= 0) // we already know: S_L_star <= 0) + { + st_middle = &st_Lstarstar; + hlld_get_starstar_L(&st_Lstar, &st_Rstar, &st_Lstarstar); + hlld_get_fluxes_starstar(st_L, &st_Lstar, &st_Lstarstar, &flux_L, S_L, S_L_star, flux); + } + else // we already know: S_R_star >= 0 and S_M <= 0 + { + st_middle = &st_Rstarstar; + hlld_get_starstar_R(&st_Lstar, &st_Rstar, &st_Rstarstar); + hlld_get_fluxes_starstar(st_R, &st_Rstar, &st_Rstarstar, &flux_R, S_R, S_R_star, flux); + } + } + } + } + else + { + printf("Left: st_L->press=%g st_L->rho=%g st_L->velx=%g\n", st_L->press, st_L->rho, st_L->velx); + printf("Right: st_R->press=%g st_R->rho=%g st_R->velx=%g\n", st_R->press, st_R->rho, st_R->velx); + terminate("density is zero\n"); + return 0; + } + + if(!state_and_flux_valid(st_middle, flux)) + { + /* HLLD did not work => use HLL instead */ + struct fluxes flux_R, flux_L; + + hlld_get_fluxes_from_state(st_L, &flux_L, NULL); + hlld_get_fluxes_from_state(st_R, &flux_R, NULL); + + hll_get_star(&st_star, &flux_L, &flux_R, st_L, st_R, S_L, S_R); + hll_get_flux(flux, &flux_L, &flux_R, st_L, st_R, S_L, S_R); + + st_middle = &st_star; + + if(!state_and_flux_valid(st_middle, flux)) + { + /* HLL did not work, use lax-friedrich flux instead */ + lax_get_flux(flux, &flux_L, &flux_R, st_L, st_R, S); + + st_star.press = 0.5 * (st_L->press + st_R->press); + } + } + + st_face->rho = st_middle->rho; + st_face->velx = st_middle->velx; + st_face->vely = st_middle->vely; + st_face->velz = st_middle->velz; + st_face->press = st_middle->press; + st_face->By = st_middle->By; + st_face->Bz = st_middle->Bz; + + if(!state_and_flux_valid(st_middle, flux)) + { + printf("M: rho=%g, v=(%g,%g,%g), p=%g, B=(%g,%g,%g)\n", st_middle->rho, st_middle->velx + vel_face[0], + st_middle->vely + vel_face[1], st_middle->velz + vel_face[2], st_middle->press, st_middle->Bx, st_middle->By, + st_middle->Bz); + printf("S_L=%g, S_L_star=%g, S_M=%g, S_R_star=%g, S_R=%g, cf_L=%g, cf_R=%g\n", S_L, S_L_star, S_M, S_R_star, S_R, cf_L, cf_R); + } + + return st_middle->press; +} + +/*! \brief Calculates signal speed of the fast magnetosonic wave. + * + * \param[in] st MHD state. + * + * \return Signal speed of fast wave. + */ +static double hlld_get_fast_wave(struct state *st) +{ + double gamma = GAMMA; + double gPress = gamma * st->press; + double Bsqr = st->Bx * st->Bx + st->By * st->By + st->Bz * st->Bz; + double gpb2 = gPress + Bsqr; + + return sqrt(0.5 / st->rho * (gpb2 + sqrt(gpb2 * gpb2 - 4. * gPress * st->Bx * st->Bx))); +} + +/*! \brief Calculates the flux from a state. + * + * Mass, momentum and energy flux. + * + * \param[in] st State. + * \param[out] flux Flux corresponding to the state. + * \param[out] st_ptot Total pressure. + * + * \return void + */ +static void hlld_get_fluxes_from_state(struct state *st, struct fluxes *flux, double *st_ptot) +{ + double gamma = GAMMA; + double gamma_minus1 = gamma - 1.; + + double cr_press = 0.; + + flux->mass = st->rho * st->velx; + double Bsqr = st->Bx * st->Bx + st->By * st->By + st->Bz * st->Bz; + flux->momentum[0] = st->rho * st->velx * st->velx + st->press + 0.5 * Bsqr - st->Bx * st->Bx + cr_press; + flux->momentum[1] = st->rho * st->velx * st->vely - st->Bx * st->By; + flux->momentum[2] = st->rho * st->velx * st->velz - st->Bx * st->Bz; + + flux->B[1] = st->By * st->velx - st->Bx * st->vely; + flux->B[2] = st->Bz * st->velx - st->Bx * st->velz; + + double etot = + st->press / gamma_minus1 + 0.5 * st->rho * (st->velx * st->velx + st->vely * st->vely + st->velz * st->velz) + 0.5 * Bsqr; + double ptot = st->press + 0.5 * Bsqr + cr_press; + + flux->energy = (etot + ptot) * st->velx - st->Bx * (st->velx * st->Bx + st->vely * st->By + st->velz * st->Bz); + + st->Energy = etot; + if(st_ptot) + *st_ptot = ptot; +} + +/*! \brief Calculates state in star region. + * + * \param[out] st_star State in star region (computed in this function). + * \param[in] st Outer state of Riemann problem. + * \param[in] S Velocity of characteristics. + * \param[in] S_M Velocity of magnetic characteristics. + * \param[in] ptot Total pressure of outer state. + * \param[in] ptot_star Total pressure in star region. + * + * \return void + */ +static void hlld_get_star(struct state *st_star, struct state *st, double S, double S_M, double ptot, double ptot_star) +{ + st_star->rho = st->rho * (S - st->velx) / (S - S_M); + st_star->velx = S_M; + st_star->vely = st->vely - st->Bx * st->By * (S_M - st->velx) / (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx); + st_star->velz = st->velz - st->Bx * st->Bz * (S_M - st->velx) / (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx); + + st_star->Bx = st->Bx; + st_star->By = st->By * (st->rho * (S - st->velx) * (S - st->velx) - st->Bx * st->Bx) / + (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx); + st_star->Bz = st->Bz * (st->rho * (S - st->velx) * (S - st->velx) - st->Bx * st->Bx) / + (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx); + + st_star->Energy = ((S - st->velx) * st->Energy - ptot * st->velx + ptot_star * S_M + + st->Bx * (st->velx * st->Bx + st->vely * st->By + st->velz * st->Bz - st_star->velx * st->Bx - + st_star->vely * st_star->By - st_star->velz * st_star->Bz)) / + (S - S_M); + + st_star->press = ptot_star - 0.5 * (st_star->Bx * st_star->Bx + st_star->By * st_star->By + st_star->Bz * st_star->Bz); +} + +/*! \brief Calculates a central flux. + * + * \param[in] st_A State of the Riemann problem. + * \param[in] st_A_star State inside fast wave. + * \param[in] flux_A Flux through face. + * \param[in] S_A speed of characteristics. + * \param[out] flux Flux through face. + * + * \return void + */ +static void hlld_get_fluxes_star(struct state *st_A, struct state *st_A_star, struct fluxes *flux_A, double S_A, struct fluxes *flux) +{ + flux->mass = flux_A->mass - S_A * (st_A->rho - st_A_star->rho); + + flux->momentum[0] = flux_A->momentum[0] - S_A * (st_A->rho * st_A->velx - st_A_star->rho * st_A_star->velx); + flux->momentum[1] = flux_A->momentum[1] - S_A * (st_A->rho * st_A->vely - st_A_star->rho * st_A_star->vely); + flux->momentum[2] = flux_A->momentum[2] - S_A * (st_A->rho * st_A->velz - st_A_star->rho * st_A_star->velz); + + flux->B[1] = flux_A->B[1] - S_A * (st_A->By - st_A_star->By); + flux->B[2] = flux_A->B[2] - S_A * (st_A->Bz - st_A_star->Bz); + + flux->energy = flux_A->energy - S_A * (st_A->Energy - st_A_star->Energy); +} + +/*! \brief Get state in starstar region, case S_M>=0. + * + * \param[in] st_star_L State in left star region. + * \param[in] st_star_R State in right star region. + * \param[out] st_starstar State in starstar region. + * + * \return void + */ +static void hlld_get_starstar_L(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar) +{ + hlld_get_starstar(st_star_L, st_star_R, st_starstar, st_star_L, -1.0); +} + +/*! \brief Get state in starstar region, case S_M<0. + * + * \param[in] st_star_L State in left star region. + * \param[in] st_star_R State in right star region. + * \param[out] st_starstar State in starstar region. + * + * \return void + */ +static void hlld_get_starstar_R(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar) +{ + hlld_get_starstar(st_star_L, st_star_R, st_starstar, st_star_R, 1.0); +} + +/*! \brief Get state in starstar region. + * + * \param[in] st_star_L State in left star region. + * \param[in] st_star_R State in right star region. + * \param[out] st_starstar State in starstar region. + * \param[in] st_star_A State where flow is coming from (depends on + * directionality of the flow). + * \param[in] sign Directionality of flow. + * + * \return void + */ +static void hlld_get_starstar(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar, struct state *st_star_A, + double sign) +{ + double sBx = st_star_A->Bx < 0 ? -1.0 : 1.0; + + double sqLrho = sqrt(st_star_L->rho); + double sqRrho = sqrt(st_star_R->rho); + + st_starstar->rho = st_star_A->rho; + + st_starstar->velx = st_star_L->velx; /* == st_star_R->velx == S_M */ + st_starstar->vely = + ((sqLrho * st_star_L->vely) + (sqRrho * st_star_R->vely) + (st_star_R->By - st_star_L->By) * sBx) / (sqLrho + sqRrho); + st_starstar->velz = + ((sqLrho * st_star_L->velz) + (sqRrho * st_star_R->velz) + (st_star_R->Bz - st_star_L->Bz) * sBx) / (sqLrho + sqRrho); + + st_starstar->Bx = st_star_A->Bx; + st_starstar->By = + ((sqLrho * st_star_R->By) + (sqRrho * st_star_L->By) + sqLrho * sqRrho * (st_star_R->vely - st_star_L->vely) * sBx) / + (sqLrho + sqRrho); + st_starstar->Bz = + ((sqLrho * st_star_R->Bz) + (sqRrho * st_star_L->Bz) + sqLrho * sqRrho * (st_star_R->velz - st_star_L->velz) * sBx) / + (sqLrho + sqRrho); + + st_starstar->Energy = st_star_A->Energy + sign * sqrt(st_star_A->rho) * sBx * + (st_star_A->velx * st_star_A->Bx + st_star_A->vely * st_star_A->By + + st_star_A->velz * st_star_A->Bz - st_starstar->velx * st_star_A->Bx - + st_starstar->vely * st_starstar->By - st_starstar->velz * st_starstar->Bz); + + st_starstar->press = st_star_A->press; +} + +/*! \brief Get fluxes in starstar region. + * + * \param[in] st_A State in outside region. + * \param[in] st_A_star State in star region. + * \param[in] st_A_starstar State in starstar region. + * \param[in] flux_A Flux corresponding to st_A. + * \param[in] S_A Speed of characteristics in outside region. + * \param[in] S_A_star Speed of characteristics in star region. + * \param[out] flux Flux in starstar region. + * + * \return void + */ +static void hlld_get_fluxes_starstar(struct state *st_A, struct state *st_A_star, struct state *st_A_starstar, struct fluxes *flux_A, + double S_A, double S_A_star, struct fluxes *flux) +{ + flux->mass = flux_A->mass + S_A_star * st_A_starstar->rho - (S_A_star - S_A) * st_A_star->rho - S_A * st_A->rho; + + flux->momentum[0] = flux_A->momentum[0] + S_A_star * st_A_starstar->rho * st_A_starstar->velx - + (S_A_star - S_A) * st_A_star->rho * st_A_star->velx - S_A * st_A->rho * st_A->velx; + flux->momentum[1] = flux_A->momentum[1] + S_A_star * st_A_starstar->rho * st_A_starstar->vely - + (S_A_star - S_A) * st_A_star->rho * st_A_star->vely - S_A * st_A->rho * st_A->vely; + flux->momentum[2] = flux_A->momentum[2] + S_A_star * st_A_starstar->rho * st_A_starstar->velz - + (S_A_star - S_A) * st_A_star->rho * st_A_star->velz - S_A * st_A->rho * st_A->velz; + + flux->B[1] = flux_A->B[1] + S_A_star * st_A_starstar->By - (S_A_star - S_A) * st_A_star->By - S_A * st_A->By; + flux->B[2] = flux_A->B[2] + S_A_star * st_A_starstar->Bz - (S_A_star - S_A) * st_A_star->Bz - S_A * st_A->Bz; + + flux->energy = flux_A->energy + S_A_star * st_A_starstar->Energy - (S_A_star - S_A) * st_A_star->Energy - S_A * st_A->Energy; +} + +/*! \brief Get state in star region. + * + * \param[out] st_star State in star region. + * \param[in] flux_L Flux from the left state. + * \param[in] flux_R Flux from the right state. + * \param[in] st_L State at the left side of the Riemann problem. + * \param[in] st_R State at the right side of the Riemann problem. + * \param[in] S_L Speed of characteristics on the left side. + * \param[in] S_R Speed of characteristics on the right side. + * + * \return void + */ +static void hll_get_star(struct state *st_star, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S_L, double S_R) +{ + double gamma = GAMMA; + double gamma_minus1 = gamma - 1.; + + double fac = 1.0 / (S_R - S_L); + + st_star->rho = fac * (S_R * st_R->rho - S_L * st_L->rho - flux_R->mass + flux_L->mass); + + st_star->velx = + fac * (S_R * st_R->rho * st_R->velx - S_L * st_L->rho * st_L->velx - flux_R->momentum[0] + flux_L->momentum[0]) / st_star->rho; + st_star->vely = + fac * (S_R * st_R->rho * st_R->vely - S_L * st_L->rho * st_L->vely - flux_R->momentum[1] + flux_L->momentum[1]) / st_star->rho; + st_star->velz = + fac * (S_R * st_R->rho * st_R->velz - S_L * st_L->rho * st_L->velz - flux_R->momentum[2] + flux_L->momentum[2]) / st_star->rho; + + st_star->Energy = fac * (S_R * st_R->Energy - S_L * st_L->Energy - flux_R->energy + flux_L->energy); + + st_star->Bx = st_R->Bx; /* == st_L->Bx */ + st_star->By = fac * (S_R * st_R->By - S_L * st_L->By - flux_R->B[1] + flux_L->B[1]); + st_star->Bz = fac * (S_R * st_R->Bz - S_L * st_L->Bz - flux_R->B[2] + flux_L->B[2]); + + st_star->press = + gamma_minus1 * + (st_star->Energy - + 0.5 * st_star->rho * (st_star->velx * st_star->velx + st_star->vely * st_star->vely + st_star->velz * st_star->velz) - + 0.5 * (st_star->Bx * st_star->Bx + st_star->By * st_star->By + st_star->Bz * st_star->Bz)); +} + +/*! \brief Get interface flux from states. + * + * \param[out] flux Flux through the interface. + * \param[in] flux_L Flux from left state. + * \param[in] flux_R Flux from right state. + * \param[in] st_L Left state. + * \param[in] st_R Right state. + * \param[in] S_L Speed of characteristics at left side. + * \param[in] S_R Speed of characteristics at right side. + * + * \return void + */ +static void hll_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S_L, double S_R) +{ + double fac = 1.0 / (S_R - S_L); + + flux->mass = fac * (S_R * flux_L->mass - S_L * flux_R->mass + S_R * S_L * (st_R->rho - st_L->rho)); + + flux->momentum[0] = + fac * (S_R * flux_L->momentum[0] - S_L * flux_R->momentum[0] + S_R * S_L * (st_R->rho * st_R->velx - st_L->rho * st_L->velx)); + flux->momentum[1] = + fac * (S_R * flux_L->momentum[1] - S_L * flux_R->momentum[1] + S_R * S_L * (st_R->rho * st_R->vely - st_L->rho * st_L->vely)); + flux->momentum[2] = + fac * (S_R * flux_L->momentum[2] - S_L * flux_R->momentum[2] + S_R * S_L * (st_R->rho * st_R->velz - st_L->rho * st_L->velz)); + + flux->energy = fac * (S_R * flux_L->energy - S_L * flux_R->energy + S_R * S_L * (st_R->Energy - st_L->Energy)); + + flux->B[1] = fac * (S_R * flux_L->B[1] - S_L * flux_R->B[1] + S_R * S_L * (st_R->By - st_L->By)); + flux->B[2] = fac * (S_R * flux_L->B[2] - S_L * flux_R->B[2] + S_R * S_L * (st_R->Bz - st_L->Bz)); +} + +/*! \brief Get interface flux from states. + * + * Lax-Friedrich flux; used whenever the HLL flux estimate invalid. + * + * \param[out] flux Flux through the interface. + * \param[in] flux_L Flux from left state. + * \param[in] flux_R Flux from right state. + * \param[in] st_L Left state. + * \param[in] st_R Right state. + * \param[in] S_L Speed of characteristics at left side. + * \param[in] S_R Speed of characteristics at right side. + * + * \return void + */ +static void lax_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S) +{ + flux->mass = 0.5 * (flux_L->mass + flux_R->mass) - 0.5 * S * (st_R->rho - st_L->rho); + + flux->momentum[0] = 0.5 * (flux_L->momentum[0] + flux_R->momentum[0]) - 0.5 * S * (st_R->rho * st_R->velx - st_L->rho * st_L->velx); + flux->momentum[1] = 0.5 * (flux_L->momentum[1] + flux_R->momentum[1]) - 0.5 * S * (st_R->rho * st_R->vely - st_L->rho * st_L->vely); + flux->momentum[2] = 0.5 * (flux_L->momentum[2] + flux_R->momentum[2]) - 0.5 * S * (st_R->rho * st_R->velz - st_L->rho * st_L->velz); + + flux->energy = 0.5 * (flux_L->energy + flux_R->energy) - 0.5 * S * (st_R->Energy - st_L->Energy); + + flux->B[1] = 0.5 * (flux_L->B[1] + flux_R->B[1]) - 0.5 * S * (st_R->By - st_L->By); + flux->B[2] = 0.5 * (flux_L->B[2] + flux_R->B[2]) - 0.5 * S * (st_R->Bz - st_L->Bz); +} + +#endif /* #if defined(RIEMANN_HLLD) */ diff --git a/src/amuse/community/arepo/src/hydro/scalars.c b/src/amuse/community/arepo/src/hydro/scalars.c new file mode 100644 index 0000000000..b28bb67b6f --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/scalars.c @@ -0,0 +1,107 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/scalars.c + * \date 05/2018 + * \brief Routines to initialize passive scalars which are advected with + * the fluid. + * \details contains functions: + * void init_scalars() + * int scalar_init(MyFloat * addr, MyFloat * addr_mass, int + * type) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#ifdef MAXSCALARS +int N_Scalar = 0; +struct scalar_elements scalar_elements[MAXSCALARS]; +struct scalar_index ScalarIndex; +#endif /* #ifdef MAXSCALARS */ + +/*! \brief Main routine to initialize passive scalar quantities. + * + * \return void + */ +void init_scalars() +{ +#ifdef MAXSCALARS + +#if defined(REFINEMENT_HIGH_RES_GAS) + ScalarIndex.HighResMass = scalar_init(&SphP[0].HighResDensity, &SphP[0].HighResMass, SCALAR_TYPE_PASSIVE); + if(ScalarIndex.HighResMass == -1) + terminate("ScalarIndex.HighResMass initialized incorrectly\n"); +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + +#ifdef PASSIVE_SCALARS + for(int i = 0; i < PASSIVE_SCALARS; i++) + { + scalar_init(&SphP[0].PScalars[i], &SphP[0].PConservedScalars[i], SCALAR_TYPE_PASSIVE); + } +#endif /* #ifdef PASSIVE_SCALARS */ + + mpi_printf("INIT: %d/%d Scalars used.\n", N_Scalar, MAXSCALARS); +#endif /* MAXSCALARS */ +} + +/*! \brief Initialize a specific scalar property. + * + * \param[in] addr Pointer to (primitive) scalar in SphP[0] struct. + * \param[in] addr_mass Pointer to conserved scalar quantity in SphP[0]. + * \param[in] type Type of scalar (e.g. SCALAR_TYPE_PASSIVE for passive + * scalar) + * + * \return Number of scalars - 1 + */ +int scalar_init(MyFloat *addr, MyFloat *addr_mass, int type) +{ +#ifdef MAXSCALARS + if(N_Scalar == MAXSCALARS) + { + mpi_printf("Failed to register scalar, maximum of %d already reached\n", MAXSCALARS); + terminate("MAXSCALARS reached"); + } + + /* save type and relative address */ + scalar_elements[N_Scalar].type = type; + scalar_elements[N_Scalar].offset = ((char *)addr) - ((char *)&SphP[0]); + scalar_elements[N_Scalar].offset_mass = ((char *)addr_mass) - ((char *)&SphP[0]); + + N_Scalar++; + + return N_Scalar - 1; + /* note: gradients are initialized in init_gradients */ +#else /* #ifdef MAXSCALARS */ + return -1; +#endif /* #ifdef MAXSCALARS #else */ +} diff --git a/src/amuse/community/arepo/src/hydro/update_primitive_variables.c b/src/amuse/community/arepo/src/hydro/update_primitive_variables.c new file mode 100644 index 0000000000..48a10cd4cf --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/update_primitive_variables.c @@ -0,0 +1,343 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/update_primitive_variables.c + * \date 05/2018 + * \brief Routines to recover the primitive hydrodynamical variables from + * the conserved ones. + * \details contains functions: + * void update_primitive_variables(void) + * void set_pressure_of_cell(int i) + * void set_pressure_of_cell_internal(struct particle_data + * *localP, struct sph_particle_data *localSphP, int i) + * void do_validity_checks(struct particle_data *localP, struct + * sph_particle_data *localSphP, int i, struct pv_update_data + * *pvd) + * void update_primitive_variables_single(struct particle_data + * *localP, struct sph_particle_data *localSphP, int i, + * struct pv_update_data *pvd) + * void update_internal_energy(struct particle_data *localP, + * struct sph_particle_data *localSphP, int i, struct + * pv_update_data *pvd) + * double get_sound_speed(int p) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Main routine to update the primitive hydrodynamics variables from + * the conserved ones. + * + * Note that the primitive variables are inconsistent with the (new) + * conserved variables after the hydro integration up to the point this + * function is called. + * + * \return void + */ +void update_primitive_variables(void) +{ + TIMER_START(CPU_CELL_UPDATES); + + struct pv_update_data pvd; + int idx, i; + + if(All.ComovingIntegrationOn) + { + pvd.atime = All.Time; + pvd.hubble_a = hubble_function(All.Time); + pvd.a3inv = 1 / (All.Time * All.Time * All.Time); + } + else + pvd.atime = pvd.hubble_a = pvd.a3inv = 1.0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + do_validity_checks(P, SphP, i, &pvd); + + update_primitive_variables_single(P, SphP, i, &pvd); + + update_internal_energy(P, SphP, i, &pvd); + + set_pressure_of_cell_internal(P, SphP, i); /* calculate the pressure from Density and Utherm (and composition) */ + + SphP[i].OldMass = P[i].Mass; + + SphP[i].TimeLastPrimUpdate = All.Time; + } + + TIMER_STOP(CPU_CELL_UPDATES); +} + +/*! \brief Wrapper function to calculate pressure of a cell from its internal + * energy. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return void + */ +void set_pressure_of_cell(int i) { set_pressure_of_cell_internal(P, SphP, i); } + +/*! \brief Function to calculate pressure from other hydrodynamics quantities. + * + * How this is done depends on the adiabatic index and potentially on sub- + * resolution physics. Note that this is just the thermal pressure (i.e. not + * including magnetic fields). + * + * \param[in] localP Pointer to particle data array. + * \param[in,out] localSphP Pointer to cell data array. + * \param[in] i Index in localP and localSphP arrays. + * + * \return void + */ +void set_pressure_of_cell_internal(struct particle_data *localP, struct sph_particle_data *localSphP, int i) +{ +#ifdef ISOTHERM_EQS + localSphP[i].Pressure = localSphP[i].Density * All.IsoSoundSpeed * All.IsoSoundSpeed; +#else /* #ifdef ISOTHERM_EQS */ + + if(localSphP[i].Utherm >= 0) + localSphP[i].Pressure = GAMMA_MINUS1 * localSphP[i].Density * localSphP[i].Utherm; + else + localSphP[i].Pressure = 0; +#endif /* #ifdef ISOTHERM_EQS */ + +#ifdef ENFORCE_JEANS_STABILITY_OF_CELLS +#if defined(USE_SFR) + if(get_starformation_rate(i) == 0) +#endif /* #if defined(USE_SFR) */ + { +#ifdef ADAPTIVE_HYDRO_SOFTENING + double cell_soft = All.ForceSoftening[localP[i].SofteningType]; +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + double cell_soft = All.GasSoftFactor * get_cell_radius(i); +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ + + localSphP[i].Pressure = + dmax(localSphP[i].Pressure, GAMMA_MINUS1 * localSphP[i].Density * 2 * All.G * localP[i].Mass / (All.cf_atime * cell_soft)); + } +#endif /* #ifdef ENFORCE_JEANS_STABILITY_OF_CELLS */ +} + +/*! \brief Validity checks for a gas cell. + * + * So far, only a positive mass constraint implemented. Terminates if not + * successful. + * + * \param[in] localP Pointer to particle data array + * \param[in,out] localSphP Pointer to cell data array + * \param[in] i Index in localP and localSphP arrays + * \param[in] pvd (unused) + * + * \return void + */ +void do_validity_checks(struct particle_data *localP, struct sph_particle_data *localSphP, int i, struct pv_update_data *pvd) +{ + if(localP[i].Mass < 0) + { + printf("very bad...i=%d ID=%d mass=%g oldMass=%g utherm=%g pos=%g|%g|%g\n", i, (int)localP[i].ID, localP[i].Mass, + localSphP[i].OldMass, localSphP[i].Utherm, localP[i].Pos[0], localP[i].Pos[1], localP[i].Pos[2]); + + terminate("stop"); + } +} + +/*! \brief Updates primitive variables in a specified cell. + * + * \param[in] localP Pointer to particle data array. + * \param[in,out] localSphP Pointer to cell data array. + * \param[in] i Index of cell in localP and localSphP arrays. + * \param[in] pvd additional data that is needed for update (e.g. cosmological + * factors). + * + * \return void + */ +void update_primitive_variables_single(struct particle_data *localP, struct sph_particle_data *localSphP, int i, + struct pv_update_data *pvd) +{ + localSphP[i].Density = localP[i].Mass / localSphP[i].Volume; + + if(localP[i].Mass > 0) + { + localP[i].Vel[0] = localSphP[i].Momentum[0] / localP[i].Mass; + localP[i].Vel[1] = localSphP[i].Momentum[1] / localP[i].Mass; + localP[i].Vel[2] = localSphP[i].Momentum[2] / localP[i].Mass; + +#ifdef MAXSCALARS + for(int k = 0; k < N_Scalar; k++) + { + *(MyFloat *)(((char *)(&localSphP[i])) + scalar_elements[k].offset) = + *(MyFloat *)(((char *)(&localSphP[i])) + scalar_elements[k].offset_mass) / localP[i].Mass; + } +#endif /* #ifdef MAXSCALARS */ + +#ifdef MHD + localSphP[i].B[0] = localSphP[i].BConserved[0] / localSphP[i].Volume; + localSphP[i].B[1] = localSphP[i].BConserved[1] / localSphP[i].Volume; + localSphP[i].B[2] = localSphP[i].BConserved[2] / localSphP[i].Volume; +#endif /* #ifdef MHD */ + } + else /* P[i].Mass <= 0 */ + { + localP[i].Vel[0] = 0; + localP[i].Vel[1] = 0; + localP[i].Vel[2] = 0; + +#ifdef MAXSCALARS + for(int k = 0; k < N_Scalar; k++) + *(MyFloat *)(((char *)(&localSphP[i])) + scalar_elements[k].offset) = 0; +#endif /* #ifdef MAXSCALARS */ + } +} + +/*! \brief Updates the internal energy field in a specified cell + * + * \param[in] localP Pointer to particle data array + * \param[in,out] localSphP Pointer to cell data array + * \param[in] i Index of cell in localP and localSphP arrays + * \param[in] pvd additional data that is needed for update (e.g. cosmological + * factors) + * + * \return void + */ +void update_internal_energy(struct particle_data *localP, struct sph_particle_data *localSphP, int i, struct pv_update_data *pvd) +{ +#ifndef ISOTHERM_EQS + double ulimit; + + if(localP[i].Mass > 0) + { +#ifdef MESHRELAX + localSphP[i].Utherm = localSphP[i].Energy / localP[i].Mass; +#else /* #ifdef MESHRELAX */ + localSphP[i].Utherm = + (localSphP[i].Energy / localP[i].Mass - + 0.5 * (localP[i].Vel[0] * localP[i].Vel[0] + localP[i].Vel[1] * localP[i].Vel[1] + localP[i].Vel[2] * localP[i].Vel[2])) / + (pvd->atime * pvd->atime); +#endif /* #ifdef MESHRELAX #else */ + +#ifdef MHD + localSphP[i].Utherm -= + 0.5 * + (localSphP[i].B[0] * localSphP[i].B[0] + localSphP[i].B[1] * localSphP[i].B[1] + localSphP[i].B[2] * localSphP[i].B[2]) / + localSphP[i].Density / pvd->atime; +#endif /* #ifdef MHD */ + + ulimit = All.MinEgySpec; + + if(localSphP[i].Utherm < ulimit) + { + EgyInjection -= localSphP[i].Energy; + + localSphP[i].Utherm = ulimit; + +#ifdef MESHRELAX + localSphP[i].Energy = localP[i].Mass * localSphP[i].Utherm; +#else /* #ifdef MESHRELAX */ + localSphP[i].Energy = + pvd->atime * pvd->atime * localP[i].Mass * localSphP[i].Utherm + + 0.5 * localP[i].Mass * + (localP[i].Vel[0] * localP[i].Vel[0] + localP[i].Vel[1] * localP[i].Vel[1] + localP[i].Vel[2] * localP[i].Vel[2]); +#endif /* #ifdef MESHRELAX */ + +#ifdef MHD + localSphP[i].Energy += + 0.5 * + (localSphP[i].B[0] * localSphP[i].B[0] + localSphP[i].B[1] * localSphP[i].B[1] + localSphP[i].B[2] * localSphP[i].B[2]) * + localSphP[i].Volume * pvd->atime; +#endif /* #ifdef MHD */ + + EgyInjection += localSphP[i].Energy; + } + } + else + localSphP[i].Utherm = 0; + + if(localSphP[i].Density < All.LimitUBelowThisDensity && localSphP[i].Utherm > All.LimitUBelowCertainDensityToThisValue) + { + localSphP[i].Utherm = All.LimitUBelowCertainDensityToThisValue; + localSphP[i].Energy = + pvd->atime * pvd->atime * localP[i].Mass * localSphP[i].Utherm + + 0.5 * localP[i].Mass * + (localP[i].Vel[0] * localP[i].Vel[0] + localP[i].Vel[1] * localP[i].Vel[1] + localP[i].Vel[2] * localP[i].Vel[2]); +#ifdef MHD + localSphP[i].Energy += + 0.5 * + (localSphP[i].B[0] * localSphP[i].B[0] + localSphP[i].B[1] * localSphP[i].B[1] + localSphP[i].B[2] * localSphP[i].B[2]) * + localSphP[i].Volume * pvd->atime; +#endif /* #ifdef MHD */ + } + + if(localSphP[i].Utherm < 0) + { + printf("negative utherm %g\n", localSphP[i].Utherm); + terminate("stop"); + } + +#endif /* #ifndef ISOTHERM_EQS */ +} + +/*! \brief Calculates the sound speed of a specified cell + * + * Depends on equation of state and potential sub-resolution physics. + * + * \param[in] p Index of gas cell in P and SphP arrays + * + * \return Sound speed + */ +double get_sound_speed(int p) +{ + double csnd; + +#ifdef ISOTHERM_EQS + csnd = All.IsoSoundSpeed; +#else /* #ifdef ISOTHERM_EQS */ + + double gamma; + gamma = GAMMA; + + if(SphP[p].Density > 0) + csnd = sqrt(gamma * SphP[p].Pressure / SphP[p].Density); + else + csnd = 0; +#endif /* #ifdef ISOTHERM_EQS #else */ + +#ifdef MHD + /* for MHD, this is an upper bound to the signal velocity + to do it more precisely, the magnet field in normal direction to the + interfaces has to be taken into account */ + double Bsqr = SphP[p].B[0] * SphP[p].B[0] + SphP[p].B[1] * SphP[p].B[1] + SphP[p].B[2] * SphP[p].B[2]; + if(All.ComovingIntegrationOn) + Bsqr /= All.Time; + csnd = sqrt(csnd * csnd + Bsqr / SphP[p].Density); +#endif /* #ifdef MHD */ + + return csnd; +} diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c new file mode 100644 index 0000000000..ad8a5222ca --- /dev/null +++ b/src/amuse/community/arepo/src/init/begrun.c @@ -0,0 +1,344 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/init/begrun.c + * \date 05/2018 + * \brief Initial set-up of a simulation run + * \details This file contains various functions to initialize a simulation + * run. In particular, the parameter file is read in and parsed + * and global variables are initialized to their proper values. + * contains functions: + * void hello(void) + * void begrun0(void) + * void begrun1(void) + * void begrun2(void) + * void set_units(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 03.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +#ifdef HAVE_HDF5 +#include +herr_t my_hdf5_error_handler(void *unused); +#endif + +static void delete_end_file(void); + +/*! \brief Prints a welcome message. + * + * \return void + */ +void hello(void) +{ + mpi_printf( + "\n __ ____ ____ ____ _____\n /__\\ ( _ \\( ___)( _ \\( _ )\n /(__)\\ ) / )__) )___/ " + ")(_)(\n(__)(__)(_)\\_)(____)(__) (_____)\n\n"); +} + +/*! \brief Prints used compile options. + * + * \return void + */ +void begrun0(void) +{ + mpi_printf( + "\nThis is Arepo, version %s.\n\nRunning with %d MPI tasks.\n\nApparently we're using %d compute nodes (we have a minimum of %d " + "MPI tasks per node, and a maximum of %d)\n\nCode was compiled with settings:\n\n", + AREPO_VERSION, NTask, NumNodes, MinTasksPerNode, MaxTasksPerNode); + + if(ThisTask == 0) + { + output_compile_time_options(); + } +} + +/*! \brief Initial setup of the simulation. + * + * First, the parameter file is read by read_parameter_file(), + * then routines for setting units, etc are called. This function only does + * the setup necessary to load the IC file. After the IC file has been loaded + * and prepared by init(), setup continues with begrun2(). This splitting is + * done so that we can return cleanly from operations that don't actually + * start the simulation (converting snapshots, making projected images, etc.) + * + * \return void + */ +void begrun1(void) +{ + read_parameter_file(ParameterFile); /* ... read in parameters for this run */ + + check_parameters(); /* consistency check of parameters */ + +#ifdef HAVE_HDF5 + H5Eset_auto(my_hdf5_error_handler, NULL); +#endif /* #ifdef HAVE_HDF5 */ + + gsl_set_error_handler(my_gsl_error_handler); + +#ifdef DEBUG + enable_core_dumps_and_fpu_exceptions(); +#endif /* #ifdef DEBUG */ + + mpi_printf("BEGRUN: Size of particle structure %3d [bytes]\n", (int)sizeof(struct particle_data)); + mpi_printf("BEGRUN: Size of sph particle structure %3d [bytes]\n", (int)sizeof(struct sph_particle_data)); + mpi_printf("BEGRUN: Size of gravity tree node %3d [bytes]\n", (int)sizeof(struct NODE)); +#ifdef MULTIPLE_NODE_SOFTENING + mpi_printf("BEGRUN: Size of auxiliary gravity node %3d [bytes]\n", (int)sizeof(struct ExtNODE)); +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + set_units(); + + if(RestartFlag == 1) /* this is needed here to allow domain decomposition right after restart */ + if(All.ComovingIntegrationOn) + init_drift_table(); + + init_io_fields(); + + force_short_range_init(); + +#if defined(FORCETEST) && !defined(FORCETEST_TESTFORCELAW) + forcetest_ewald_init(); +#endif /* #if defined (FORCETEST) && !defined(FORCETEST_TESTFORCELAW) */ + + /* set up random number generators */ + random_generator = gsl_rng_alloc(gsl_rng_ranlxd1); + random_generator_aux = gsl_rng_alloc(gsl_rng_ranlxd1); + + /* individual start-up seed */ + gsl_rng_set(random_generator, 42 + ThisTask); + gsl_rng_set(random_generator_aux, 31452 + ThisTask); + + timebins_init(&TimeBinsHydro, "Hydro", &All.MaxPartSph); + timebins_init(&TimeBinsGravity, "Gravity", &All.MaxPart); + +#if defined(COOLING) + All.Time = All.TimeBegin; + set_cosmo_factors_for_current_time(); + InitCool(); +#endif /* #if defined(COOLING) */ + +#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) + ewald_init(); +#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ + +#ifdef TILE_ICS + All.BoxSize *= All.TileICsFactor; +#endif /* #ifdef TILE_ICS */ + + boxSize = All.BoxSize; + boxHalf = 0.5 * All.BoxSize; +#ifdef LONG_X + boxHalf_X = boxHalf * LONG_X; + boxSize_X = boxSize * LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + boxHalf_Y = boxHalf * LONG_Y; + boxSize_Y = boxSize * LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + boxHalf_Z = boxHalf * LONG_Z; + boxSize_Z = boxSize * LONG_Z; +#endif /* #ifdef LONG_Z */ + + EgyInjection = 0; + +#ifdef PMGRID + if((RestartFlag != 3) && (RestartFlag != 6)) + long_range_init(); +#endif /* #ifdef PMGRID */ + + if(RestartFlag <= 2) + open_logfiles(); + + All.TimeLastRestartFile = CPUThisRun; + +#ifdef REDUCE_FLUSH + All.FlushLast = CPUThisRun; +#endif /* #ifdef REDUCE_FLUSH */ + + init_scalars(); + + init_gradients(); +} + +/*! \brief Late setup, after the IC file has been loaded but before run() is + * called. + * + * The output files are opened and various modules are initialized. The next + * output time is determined by find_next_outputtime() and various timers are + * set. + * + * \return void + */ +void begrun2(void) +{ + char contfname[1000]; + sprintf(contfname, "%scont", All.OutputDir); + unlink(contfname); + + delete_end_file(); + + if(RestartFlag > 2) + open_logfiles(); + +#if defined(USE_SFR) + sfr_init(); +#endif /* #if defined(USE_SFR) */ + +#ifdef PMGRID + long_range_init_regionsize(); +#endif /* #ifdef PMGRID */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + special_particle_create_list(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + + if(RestartFlag != 1) /* this needs to be done here because here All.TimeBegin has the correct value */ + if(All.ComovingIntegrationOn) + init_drift_table(); + + { + if(RestartFlag == 2) + All.Ti_nextoutput = find_next_outputtime(All.Ti_Current + 100); + else + All.Ti_nextoutput = find_next_outputtime(All.Ti_Current); + } + + All.TimeLastRestartFile = CPUThisRun; + +#ifdef REDUCE_FLUSH + All.FlushLast = CPUThisRun; +#endif /* #ifdef REDUCE_FLUSH */ + +#if defined(FORCETEST) && defined(FORCETEST_TESTFORCELAW) + gravity_forcetest_testforcelaw(); +#endif /* #if defined(FORCETEST) && defined(FORCETEST_TESTFORCELAW) */ +} + +/*! \brief Computes conversion factors between internal code units and the + * cgs-system. + * + * In addition constants like the gravitation constant are set. + * + * \return void + */ +void set_units(void) +{ + double meanweight; + +#ifdef STATICNFW + double Mtot; +#endif /* #ifdef STATICNFW */ + + All.UnitTime_in_s = All.UnitLength_in_cm / All.UnitVelocity_in_cm_per_s; + All.UnitTime_in_Megayears = All.UnitTime_in_s / SEC_PER_MEGAYEAR; + + if(All.GravityConstantInternal == 0) + All.G = GRAVITY / pow(All.UnitLength_in_cm, 3) * All.UnitMass_in_g * pow(All.UnitTime_in_s, 2); + else + All.G = All.GravityConstantInternal; + + All.UnitDensity_in_cgs = All.UnitMass_in_g / pow(All.UnitLength_in_cm, 3); + All.UnitPressure_in_cgs = All.UnitMass_in_g / All.UnitLength_in_cm / pow(All.UnitTime_in_s, 2); + All.UnitCoolingRate_in_cgs = All.UnitPressure_in_cgs / All.UnitTime_in_s; + All.UnitEnergy_in_cgs = All.UnitMass_in_g * pow(All.UnitLength_in_cm, 2) / pow(All.UnitTime_in_s, 2); + + /* convert some physical input parameters to internal units */ + + All.Hubble = HUBBLE * All.UnitTime_in_s; + + mpi_printf("BEGRUN: Hubble (internal units) = %g\n", All.Hubble); + mpi_printf("BEGRUN: G (internal units) = %g\n", All.G); + mpi_printf("BEGRUN: UnitMass_in_g = %g\n", All.UnitMass_in_g); + mpi_printf("BEGRUN: UnitTime_in_s = %g\n", All.UnitTime_in_s); + mpi_printf("BEGRUN: UnitVelocity_in_cm_per_s = %g\n", All.UnitVelocity_in_cm_per_s); + mpi_printf("BEGRUN: UnitDensity_in_cgs = %g\n", All.UnitDensity_in_cgs); + mpi_printf("BEGRUN: UnitEnergy_in_cgs = %g\n", All.UnitEnergy_in_cgs); + mpi_printf("\n"); + + meanweight = 4.0 / (1 + 3 * HYDROGEN_MASSFRAC); /* note: assuming NEUTRAL GAS */ + + if(All.MinEgySpec == 0) + { + All.MinEgySpec = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.MinGasTemp; + All.MinEgySpec *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; + + mpi_printf("BEGRUN: MinEgySpec set to %g based on MinGasTemp=%g\n", All.MinEgySpec, All.MinGasTemp); + } + +#if defined(USE_SFR) + set_units_sfr(); +#endif /* #if defined(USE_SFR) */ + +#ifdef STATICNFW + R200 = pow(NFW_M200 * All.G / (100 * All.Hubble * All.Hubble), 1.0 / 3); + Rs = R200 / NFW_C; + Dc = 200.0 / 3 * NFW_C * NFW_C * NFW_C / (log(1 + NFW_C) - NFW_C / (1 + NFW_C)); + RhoCrit = 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + V200 = 10 * All.Hubble * R200; + mpi_printf("V200= %g\n", V200); + + fac = 1.0; + Mtot = enclosed_mass(R200); + mpi_printf("M200= %g\n", Mtot); + fac = V200 * V200 * V200 / (10 * All.G * All.Hubble) / Mtot; + Mtot = enclosed_mass(R200); + mpi_printf("M200= %g\n", Mtot); +#endif /* #ifdef STATICNFW */ +} + +/*! \brief deletes the end file if it exists. + * + * This is needed in case a already completed simulation is extended or + * overwritten. Note that the end-file is completely passive. + * + * \return void + */ +static void delete_end_file(void) +{ + if(RestartFlag > 2) // no simulation happening + { + return; + } + + char endfname[1000]; + sprintf(endfname, "%send", All.OutputDir); + unlink(endfname); + return; +} diff --git a/src/amuse/community/arepo/src/init/density.c b/src/amuse/community/arepo/src/init/density.c new file mode 100644 index 0000000000..8be85e443b --- /dev/null +++ b/src/amuse/community/arepo/src/init/density.c @@ -0,0 +1,635 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/init/density.c + * \date 05/2018 + * \brief SPH density computation and smoothing length determination. + * \details This file contains the "first SPH loop", where the SPH + * densities and smoothing lengths are calculated. + * In Arepo, this is used in setup_smoothinglengths() (init.c) to + * get an initial guess for MaxDelaunayRadius. + * Note that the SPH density is NOT used in the subsequent + * hydrodynamics calculation, but the density is either set by the + * initial conditions explicitly (DENSITY_AS_MASS_IN_INPUT) or + * calculated by the mass given in the initial conditions divided + * by the volume of the cell calculated by the Voronoi + * tessellation algorithm. + * contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void density(void) + * static int density_evaluate(int target, int mode, int + * threadid) + * int density_isactive(int n) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +static int density_evaluate(int target, int mode, int threadid); + +static MyFloat *NumNgb, *DhsmlDensityFactor; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES +static MyFloat *MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MyIDType ID; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + in->Hsml = SphP[i].Hsml; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + in->ID = P[i].ID; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Rho; + MyFloat DhsmlDensity; + MyFloat Ngb; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MyFloat MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + NumNgb[i] = out->Ngb; + if(P[i].Type == 0) + { + SphP[i].Density = out->Rho; + DhsmlDensityFactor[i] = out->DhsmlDensity; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MinDist[i] = out->MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + } + } + else /* combine */ + { + NumNgb[i] += out->Ngb; + if(P[i].Type == 0) + { + SphP[i].Density += out->Rho; + DhsmlDensityFactor[i] += out->DhsmlDensity; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + if(MinDist[i] > out->MinDist) + MinDist[i] = out->MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + } + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int idx; + + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= TimeBinsHydro.NActiveParticles) + break; + + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(density_isactive(i)) + density_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + density_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +static MyFloat *NumNgb, *DhsmlDensityFactor; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES +static MyFloat *MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + +/*! \brief Main function of SPH density calculation. + * + * This function computes the local density for each active SPH particle and + * the number of weighted neighbors in the current smoothing radius. If a + * particle with its smoothing region is fully inside the local domain, it is + * not exported to the other processors. The function also detects particles + * that have a number of neighbors outside the allowed tolerance range. For + * these particles, the smoothing length is adjusted accordingly, and the + * computation is called again. + * + * \return void + */ +void density(void) +{ + MyFloat *Left, *Right; + int idx, i, npleft, iter = 0; + long long ntot; + double desnumngb, t0, t1; + + CPU_Step[CPU_MISC] += measure_time(); + + NumNgb = (MyFloat *)mymalloc("NumNgb", NumPart * sizeof(MyFloat)); + DhsmlDensityFactor = (MyFloat *)mymalloc("DhsmlDensityFactor", NumPart * sizeof(MyFloat)); + Left = (MyFloat *)mymalloc("Left", NumPart * sizeof(MyFloat)); + Right = (MyFloat *)mymalloc("Right", NumPart * sizeof(MyFloat)); + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MinDist = (MyFloat *)mymalloc("MinDist", NumPart * sizeof(MyFloat)); +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(density_isactive(i)) + { + Left[i] = Right[i] = 0; + } + } + + generic_set_MaxNexport(); + + desnumngb = All.DesNumNgb; + + /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ + do + { + t0 = second(); + + generic_comm_pattern(TimeBinsHydro.NActiveParticles, kernel_local, kernel_imported); + + /* do final operations on results */ + for(idx = 0, npleft = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(density_isactive(i)) + { + if(P[i].Type == 0) + { + if(SphP[i].Density > 0) + { + DhsmlDensityFactor[i] *= SphP[i].Hsml / (NUMDIMS * SphP[i].Density); + if(DhsmlDensityFactor[i] > -0.9) /* note: this would be -1 if only a single particle at zero lag is found */ + DhsmlDensityFactor[i] = 1 / (1 + DhsmlDensityFactor[i]); + else + DhsmlDensityFactor[i] = 1; + } + } + + if(NumNgb[i] < (desnumngb - All.MaxNumNgbDeviation) || NumNgb[i] > (desnumngb + All.MaxNumNgbDeviation)) + { + /* need to redo this particle */ + npleft++; + + if(Left[i] > 0 && Right[i] > 0) + if((Right[i] - Left[i]) < 1.0e-3 * Left[i]) + { + /* this one should be ok */ + npleft--; + P[i].TimeBinHydro = -P[i].TimeBinHydro - 1; /* Mark as inactive */ + continue; + } + + if(NumNgb[i] < (desnumngb - All.MaxNumNgbDeviation)) + Left[i] = dmax(SphP[i].Hsml, Left[i]); + else + { + if(Right[i] != 0) + { + if(SphP[i].Hsml < Right[i]) + Right[i] = SphP[i].Hsml; + } + else + Right[i] = SphP[i].Hsml; + } + + if(iter >= MAXITER - 10) + { + printf("i=%d task=%d ID=%d Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, ThisTask, + (int)P[i].ID, SphP[i].Hsml, Left[i], Right[i], (float)NumNgb[i], Right[i] - Left[i], P[i].Pos[0], + P[i].Pos[1], P[i].Pos[2]); + myflush(stdout); + } + + if(Right[i] > 0 && Left[i] > 0) + SphP[i].Hsml = pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3); + else + { + if(Right[i] == 0 && Left[i] == 0) + terminate("should not occur"); + + if(Right[i] == 0 && Left[i] > 0) + { + SphP[i].Hsml *= 1.26; + } + + if(Right[i] > 0 && Left[i] == 0) + { + SphP[i].Hsml /= 1.26; + } + } + } + else + P[i].TimeBinHydro = -P[i].TimeBinHydro - 1; /* Mark as inactive */ + } + } + + sumup_large_ints(1, &npleft, &ntot); + + t1 = second(); + + if(ntot > 0) + { + iter++; + + if(iter > 0) + mpi_printf("DENSITY: ngb iteration %3d: need to repeat for %12lld particles. (took %g sec)\n", iter, ntot, + timediff(t0, t1)); + + if(iter > MAXITER) + terminate("failed to converge in neighbour iteration in density()\n"); + } + } + while(ntot > 0); + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + +#if defined(REFLECTIVE_X) && defined(REFLECTIVE_Y) && defined(REFLECTIVE_Z) + + int count2 = 0; + int countall2 = 0; + + for(i = 0; i < NumGas; i++) + { + /* + * If the distance to the border of a particle is too small, + * then the ghost particle will be too close to this particle. + * Therefore we shift the particle in this case into the direction of the box center. + */ + if(distance_to_border(i) < 0.5 * 0.001 * SphP[i].Hsml) + { + count2++; + + double dir[3]; + + dir[0] = boxSize_X * 0.5 - P[i].Pos[0]; + dir[1] = boxSize_Y * 0.5 - P[i].Pos[1]; + dir[2] = boxSize_Z * 0.5 - P[i].Pos[2]; + + double n = sqrt(dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]); + // note: it's not possible that the operand of sqrt is zero here. + + dir[0] /= n; + dir[1] /= n; + dir[2] /= n; + + P[i].Pos[0] += 0.05 * SphP[i].Hsml * dir[0]; + P[i].Pos[1] += 0.05 * SphP[i].Hsml * dir[1]; + P[i].Pos[2] += 0.05 * SphP[i].Hsml * dir[2]; + } + } + + MPI_Allreduce(&count2, &countall2, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + mpi_printf("\nFOUND %d particles extremely close to the reflective boundary. Fixing this. \n\n", countall2); +#endif /* #if defined(REFLECTIVE_X) && defined(REFLECTIVE_Y) && defined(REFLECTIVE_Z) */ + + int count = 0, countall; + + for(i = 0; i < NumGas; i++) + if(MinDist[i] < 0.001 * SphP[i].Hsml) + count++; + + MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(countall) + { + mpi_printf("\nFOUND %d SPH particles with an extremely close neighbor. Fixing this. \n\n", countall); + + for(i = 0; i < NumGas; i++) + if(MinDist[i] < 0.001 * SphP[i].Hsml) + { + double theta = acos(2 * get_random_number() - 1); + double phi = 2 * M_PI * get_random_number(); + + P[i].Pos[0] += 0.1 * SphP[i].Hsml * sin(theta) * cos(phi); + P[i].Pos[1] += 0.1 * SphP[i].Hsml * sin(theta) * sin(phi); + P[i].Pos[2] += 0.1 * SphP[i].Hsml * cos(theta); + } + } +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + myfree(MinDist); +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + myfree(Right); + myfree(Left); + myfree(DhsmlDensityFactor); + myfree(NumNgb); + + /* mark as active again */ + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinHydro < 0) + P[i].TimeBinHydro = -P[i].TimeBinHydro - 1; + } + + /* collect some timing information */ + CPU_Step[CPU_INIT] += measure_time(); +} + +/*! \brief Inner function of the SPH density calculation + * + * This function represents the core of the SPH density computation. The + * target particle may either be local, or reside in the communication + * buffer. + * + * \param[in] target Index of particle in local data/import buffer. + * \param[in] mode Mode in which function is called (local or impored data). + * \param[in] threadid ID of local thread. + * + * \return 0 + */ +static int density_evaluate(int target, int mode, int threadid) +{ + int j, n; + int numngb, numnodes, *firstnode; + double h, h2, hinv, hinv3, hinv4; + MyFloat rho; + double wk, dwk; + double dx, dy, dz, r, r2, u, mass_j; + MyFloat weighted_numngb; + MyFloat dhsmlrho; + MyDouble *pos; + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MyFloat mindist = MAX_REAL_NUMBER; + MyIDType ID; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + data_in local, *target_data; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + h = target_data->Hsml; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + ID = target_data->ID; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + h2 = h * h; + hinv = 1.0 / h; +#ifndef TWODIMS + hinv3 = hinv * hinv * hinv; +#else /* #ifndef TWODIMS */ + hinv3 = hinv * hinv / boxSize_Z; +#endif /* #ifndef TWODIMS #else */ + hinv4 = hinv3 * hinv; + + numngb = 0; + rho = weighted_numngb = dhsmlrho = 0; + + int nfound = ngb_treefind_variable_threads(pos, h, target, mode, threadid, numnodes, firstnode); + + for(n = 0; n < nfound; n++) + { + j = Thread[threadid].Ngblist[n]; + + dx = pos[0] - P[j].Pos[0]; + dy = pos[1] - P[j].Pos[1]; + dz = pos[2] - P[j].Pos[2]; + +/* now find the closest image in the given box size */ +#ifndef REFLECTIVE_X + if(dx > boxHalf_X) + dx -= boxSize_X; + if(dx < -boxHalf_X) + dx += boxSize_X; +#endif /* #ifndef REFLECTIVE_X */ + +#ifndef REFLECTIVE_Y + if(dy > boxHalf_Y) + dy -= boxSize_Y; + if(dy < -boxHalf_Y) + dy += boxSize_Y; +#endif /* #ifndef REFLECTIVE_Y */ + +#ifndef REFLECTIVE_Z + if(dz > boxHalf_Z) + dz -= boxSize_Z; + if(dz < -boxHalf_Z) + dz += boxSize_Z; +#endif /* #ifndef REFLECTIVE_Z */ + r2 = dx * dx + dy * dy + dz * dz; + + if(r2 < h2) + { + numngb++; + + r = sqrt(r2); + + u = r * hinv; + + if(u < 0.5) + { + wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u); + dwk = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4); + } + else + { + wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u); + dwk = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u); + } + + mass_j = P[j].Mass; + + rho += FLT(mass_j * wk); + + weighted_numngb += FLT(NORM_COEFF * wk / hinv3); /* 4.0/3 * PI = 4.188790204786 */ + + dhsmlrho += FLT(-mass_j * (NUMDIMS * hinv * wk + u * dwk)); + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + if(ID != P[j].ID && mindist > r) + mindist = r; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + } + } + + out.Rho = rho; + out.Ngb = weighted_numngb; + out.DhsmlDensity = dhsmlrho; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + out.MinDist = mindist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +/* \brief Determines if a cell is active in current timestep. + * + * If the cell is not active in a timestep, its value in TimeBinHydro is + * negative. + * + * \param[in] n Index of cell in P and SphP arrays. + * + * \return 1: cell active; 0: cell not active or not a cell. + */ +int density_isactive(int n) +{ + if(P[n].TimeBinHydro < 0) + return 0; + + if(P[n].Type == 0) + return 1; + + return 0; +} diff --git a/src/amuse/community/arepo/src/init/init.c b/src/amuse/community/arepo/src/init/init.c new file mode 100644 index 0000000000..934fef29da --- /dev/null +++ b/src/amuse/community/arepo/src/init/init.c @@ -0,0 +1,835 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/init/init.c + * \date 05/2018 + * \brief Initialization of a simulation from initial conditions. + * \details contains functions: + * int init(void) + * void check_omega(void) + * void setup_smoothinglengths(void) + * void test_id_uniqueness(void) + * void calculate_maxid(void) + * int compare_IDs(const void *a, const void *b) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +/*! \brief Prepares the loaded initial conditions for the run. + * + * It is only called if RestartFlag !=1. Various counters and variables are + * initialized. Entries of the particle data structures not read from initial + * conditions are initialized or converted and a initial domain decomposition + * is performed. If gas cells are present, the initial SPH smoothing lengths + * are determined. + * + * \return status code: <0 if finished without errors and run can start, + * 0 code ends after calling init() > 0 an error occurred, terminate. + */ +int init(void) +{ + int i, j; + double mass; + + assert(RestartFlag != 1); + + if(All.ComovingIntegrationOn) + if(All.PeriodicBoundariesOn == 1) + { + if(RestartFlag < 3) + /* can't do this check when not all particles are loaded */ + check_omega(); + else + mpi_printf("INIT: Skipping Omega check since we are not doing a dynamical evolution (not all particles may be loaded)\n"); + } + +#if defined(COOLING) + IonizeParams(); +#endif /* #if defined(COOLING) */ + + if(All.ComovingIntegrationOn) + { + All.Timebase_interval = (log(All.TimeMax) - log(All.TimeBegin)) / TIMEBASE; + All.Ti_Current = 0; + } + else + { + All.Timebase_interval = (All.TimeMax - All.TimeBegin) / TIMEBASE; + All.Ti_Current = 0; + } + + set_cosmo_factors_for_current_time(); + + for(j = 0; j < 3; j++) + All.GlobalDisplacementVector[j] = 0; + + All.NumCurrentTiStep = 0; /* setup some counters */ + All.SnapshotFileCount = 0; + + if(RestartFlag == 2) + { + if(RestartSnapNum < 0) + All.SnapshotFileCount = atoi(All.InitCondFile + strlen(All.InitCondFile) - 3) + 1; + else + All.SnapshotFileCount = RestartSnapNum + 1; + } + + All.TotNumOfForces = 0; + All.TopNodeAllocFactor = 0.08; + All.TreeAllocFactor = 0.7; + All.NgbTreeAllocFactor = 0.7; + + if(NumPart < 1000) + All.TreeAllocFactor = 10.0; + + DeRefMesh.Indi.AllocFacNdp = MIN_ALLOC_NUMBER; + DeRefMesh.Indi.AllocFacNdt = MIN_ALLOC_NUMBER; + + Mesh.Indi.AllocFacNdp = 1.2 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacNdt = 8.0 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacNvf = 8.0 * NumGas + MIN_ALLOC_NUMBER; + + Mesh.Indi.AllocFacNvc = 16.0 * NumGas + MIN_ALLOC_NUMBER; + Nvc = 0; + + Mesh.Indi.AllocFacNinlist = 1.2 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacN_DP_Buffer = 0.2 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacNflux = 0.01 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacNradinflux = 0.01 * NumGas + MIN_ALLOC_NUMBER; + +#ifdef MHD_POWELL + for(j = 0; j < 3; j++) + { + All.Powell_Momentum[j] = 0; + All.Powell_Angular_Momentum[j] = 0; + } + All.Powell_Energy = 0; +#endif /* #ifdef MHD_POWELL */ + + All.TimeLastStatistics = All.TimeBegin - All.TimeBetStatistics; + + set_softenings(); + +#ifdef ADAPTIVE_HYDRO_SOFTENING + mpi_printf("INIT: Adaptive hydro softening, minimum gravitational softening for cells: %g\n", All.MinimumComovingHydroSoftening); + mpi_printf("INIT: Adaptive hydro softening, maximum gravitational softening for cells: %g\n", + All.MinimumComovingHydroSoftening * pow(All.AdaptiveHydroSofteningSpacing, NSOFTTYPES_HYDRO - 1)); + mpi_printf("INIT: Adaptive hydro softening, number of softening values: %d\n", NSOFTTYPES_HYDRO); +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING + init_individual_softenings(); +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + +#ifdef SHIFT_BY_HALF_BOX + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + P[i].Pos[j] += 0.5 * All.BoxSize; +#endif /* #ifdef SHIFT_BY_HALF_BOX */ + + for(i = 0; i < GRAVCOSTLEVELS; i++) + All.LevelToTimeBin[i] = -1; + + for(i = 0; i < NumPart; i++) + for(j = 0; j < GRAVCOSTLEVELS; j++) + P[i].GravCost[j] = 0; + + /* set unused coordinate values in 1d and 2d simulations to zero; this is needed for correct interfaces */ + int nonzero_vel = 0; +#ifdef ONEDIMS + for(i = 0; i < NumPart; i++) + { + P[i].Pos[1] = 0.0; + P[i].Pos[2] = 0.0; + + if(P[i].Vel[1] != 0.0 || P[i].Vel[2] != 0.0) + { + nonzero_vel = 1; + } + } + if(nonzero_vel > 0) + { + warn("Initial y or z velocity nonzero in 1d simulation! Make sure you really want this!"); + } +#endif /* #ifdef ONEDIMS */ + +#ifdef TWODIMS + for(i = 0; i < NumPart; i++) + { + P[i].Pos[2] = 0; + + if(P[i].Vel[2] != 0.0) + { + nonzero_vel = 1; + } + } + if(nonzero_vel > 0) + { + warn("Initial z velocity nonzero in 2d simulation! Make sure you really want this!"); + } +#endif /* #ifdef TWODIMS */ + + if(All.ComovingIntegrationOn) /* change to new velocity variable */ + { + for(i = 0; i < NumPart; i++) + { + for(j = 0; j < 3; j++) + P[i].Vel[j] *= sqrt(All.Time) * All.Time; /* for dm/gas particles, p = a^2 xdot */ + } + } + + /* measure mean cell mass */ + int num = 0; + long long glob_num; + double glob_mass; + mass = 0; + + for(i = 0; i < NumGas; i++) +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].AllowRefinement != 0) +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + { + num += 1; + mass += P[i].Mass; + } + + sumup_large_ints(1, &num, &glob_num); + MPI_Allreduce(&mass, &glob_mass, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + +#ifndef REFINEMENT_HIGH_RES_GAS + if(glob_num != All.TotNumGas) + terminate("glob_num(=%lld) != All.TotNumGas(=%lld)", glob_num, All.TotNumGas); +#endif /* #ifndef REFINEMENT_HIGH_RES_GAS */ + + if(All.TotNumGas > 0 && (glob_num == 0 || glob_mass == 0)) + terminate("All.TotNumGas(=%lld) > 0 && (glob_num(=%lld) == 0 || glob_mass(=%g) == 0)", All.TotNumGas, glob_num, glob_mass); + + /* assign global variables that depend on the mean cell mass */ +#if defined(REFINEMENT) + if(All.ReferenceGasPartMass == 0) + { + if(!All.ComovingIntegrationOn) + terminate("In non-comoving runs, ReferenceGasPartMass must be set to a non-zero value"); + + All.ReferenceGasPartMass = glob_mass / glob_num; + + mpi_printf("REFINEMENT: The mean cell mass, which is used as a reference, is %g\n", All.ReferenceGasPartMass); + } + else + mpi_printf("REFINEMENT: The given reference cell mass is %g\n", All.ReferenceGasPartMass); + All.TargetGasMass = All.TargetGasMassFactor * All.ReferenceGasPartMass; + mpi_printf("REFINEMENT: setting All.TargetGasMass=%g\n", All.TargetGasMass); +#endif /* #if defined(REFINEMENT) */ + + for(i = 0; i < TIMEBINS; i++) + All.Ti_begstep[i] = 0; + + for(i = 0; i < NumPart; i++) /* start-up initialization */ + { + for(j = 0; j < 3; j++) + P[i].GravAccel[j] = 0; + +#ifdef PMGRID + for(j = 0; j < 3; j++) + P[i].GravPM[j] = 0; +#endif /* #ifdef PMGRID */ + P[i].TimeBinHydro = 0; + P[i].TimeBinGrav = 0; + P[i].OldAcc = 0; /* Do not zero as masses are stored here */ + +#ifdef SELFGRAVITY +#ifdef EVALPOTENTIAL + if(RestartFlag == 0) + P[i].Potential = 0; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef SELFGRAVITY */ + +#ifdef USE_SFR + if(RestartFlag == 0 && P[i].Type == 0) + SphP[i].Sfr = 0; +#endif /* #ifdef USE_SFR */ + } + + for(i = 0; i < TIMEBINS; i++) + TimeBinSynchronized[i] = 1; + + reconstruct_timebins(); + +#ifdef PMGRID + All.PM_Ti_endstep = All.PM_Ti_begstep = 0; +#endif /* #ifdef PMGRID */ + + for(i = 0; i < NumGas; i++) /* initialize sph_properties */ + { + if(RestartFlag == 2 || RestartFlag == 3) + for(j = 0; j < 3; j++) + SphP[i].Center[j] = P[i].Pos[j]; + +#if defined(CELL_CENTER_GRAVITY) && !defined(OUTPUT_CENTER_OF_MASS) + if(RestartFlag == 17 || RestartFlag == 18) + for(j = 0; j < 3; j++) + SphP[i].Center[j] = P[i].Pos[j]; +#endif /* #if defined(CELL_CENTER_GRAVITY) && !defined(OUTPUT_CENTER_OF_MASS) */ + + if(RestartFlag == 0) + { + for(j = 0; j < 3; j++) + SphP[i].Center[j] = P[i].Pos[j]; + + SphP[i].Hsml = 0; +#if defined(COOLING) + SphP[i].Ne = 1.0; +#endif /* #if defined(COOLING) */ + } + } + +#ifndef NODEREFINE_BACKGROUND_GRID + double mvol = 0; + if(All.TotNumGas) + { +#ifdef TWODIMS + mvol = boxSize_X * boxSize_Y / All.TotNumGas; +#else /* #ifdef TWODIMS */ +#ifdef ONEDIMS + mvol = boxSize_X / All.TotNumGas; +#else /* #ifdef ONEDIMS */ + mvol = boxSize_X * boxSize_Y * boxSize_Z / All.TotNumGas; +#endif /* #ifdef ONEDIMS #else */ +#endif /* #ifdef TWODIMS #else */ + } + + All.MeanVolume = mvol; +#endif /* #ifndef NODEREFINE_BACKGROUND_GRID */ + + mpi_printf("INIT: MeanVolume=%g\n", All.MeanVolume); + +#ifndef NO_ID_UNIQUE_CHECK + test_id_uniqueness(); +#endif /* #ifndef NO_ID_UNIQUE_CHECK */ + +#ifdef REFINEMENT_MERGE_CELLS + for(i = 0; i < NumPart; i++) + if(P[i].Type == 0 && P[i].ID == 0) + terminate("INIT: Cannot use ID==0 for gas in ICs with derefinement enabled."); +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + + voronoi_init_connectivity(&Mesh); + +#ifdef ADDBACKGROUNDGRID + prepare_domain_backgroundgrid(); +#endif /* #ifdef ADDBACKGROUNDGRID */ + + domain_Decomposition(); /* do initial domain decomposition (gives equal numbers of particles) */ + + if(RestartFlag == 18) /* recalculation of potential */ + { + mark_active_timebins(); + open_logfiles(); +#if defined(USE_SFR) + sfr_init(); +#endif /* #if defined(USE_SFR) */ + set_non_standard_physics_for_current_time(); + +#ifdef PMGRID + long_range_init_regionsize(); +#endif /* #ifdef PMGRID */ + + compute_grav_accelerations(All.HighestActiveTimeBin, FLAG_FULL_TREE); + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) && defined(FOF) + PS = (struct subfind_data *)mymalloc_movable(&PS, "PS", All.MaxPart * sizeof(struct subfind_data)); + fof_prepare_output_order(); /* sort by type and Fileorder */ + fof_subfind_exchange(MPI_COMM_WORLD); +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) && defined(FOF) */ + + sprintf(All.SnapshotFileBase, "%s_potupdated", All.SnapshotFileBase); + mpi_printf("Start writing file %s\nRestartSnapNum %d\n", All.SnapshotFileBase, RestartSnapNum); + savepositions(RestartSnapNum, 0); + + endrun(); + } + + /* will build tree */ + ngb_treeallocate(); + ngb_treebuild(NumGas); + + if(RestartFlag == 3) + { +#ifdef FOF + fof_fof(RestartSnapNum); + DumpFlag = 1; + savepositions(RestartSnapNum, 0); +#endif /* #ifdef FOF */ + return (0); + } + + All.Ti_Current = 0; + + if(RestartFlag == 0 || RestartFlag == 2 || RestartFlag == 14 || RestartFlag == 17) + setup_smoothinglengths(); + +#ifdef ADDBACKGROUNDGRID + // This return more clearly shows that this function terminates the run + return add_backgroundgrid(); +#endif /* #ifdef ADDBACKGROUNDGRID */ + + create_mesh(); + mesh_setup_exchange(); + + if(RestartFlag == 14) + { + char tess_name[1024]; + sprintf(tess_name, "%s/tess_%03d", All.OutputDir, RestartSnapNum); + write_voronoi_mesh(&Mesh, tess_name, 0, NTask - 1); + return 0; + } + + for(i = 0, mass = 0; i < NumGas; i++) + { + if(RestartFlag == 0) + { +#ifdef READ_MASS_AS_DENSITY_IN_INPUT + P[i].Mass *= SphP[i].Volume; +#endif /* #ifdef READ_MASS_AS_DENSITY_IN_INPUT */ + } + + SphP[i].Density = P[i].Mass / SphP[i].Volume; + + if(SphP[i].Density < All.MinimumDensityOnStartUp) + { + SphP[i].Density = All.MinimumDensityOnStartUp; + + P[i].Mass = SphP[i].Volume * SphP[i].Density; + } + + SphP[i].Momentum[0] = P[i].Mass * P[i].Vel[0]; + SphP[i].Momentum[1] = P[i].Mass * P[i].Vel[1]; + SphP[i].Momentum[2] = P[i].Mass * P[i].Vel[2]; + +#ifdef MHD +#ifdef MHD_SEEDFIELD + if(RestartFlag == 0) + { + if(i == 0) + { + mpi_printf("MHD Seed field=%g, direction=%d\n", All.B_value, All.B_dir); + } + + int k; + double bfac = 1. / (sqrt(All.UnitMass_in_g / All.UnitLength_in_cm) / (All.UnitTime_in_s / All.HubbleParam)); + + double B_value = All.B_value; + + for(k = 0; k < 3; k++) + if(All.B_dir & (1 << k)) + { + SphP[i].BConserved[k] = B_value * SphP[i].Volume * bfac; + SphP[i].B[k] = SphP[i].BConserved[k] / SphP[i].Volume; + } + else + { + SphP[i].BConserved[k] = 0; + SphP[i].B[k] = SphP[i].BConserved[k] / SphP[i].Volume; + } + + if(i == 0) + { + mpi_printf("BConserved[0] = %g|%g|%g\n", SphP[i].BConserved[0], SphP[i].BConserved[1], SphP[i].BConserved[2]); + mpi_printf("Volume[0] %g bfac %g\n", SphP[i].Volume, bfac); + } + /* convert Gauss-cgs to heavyside - lorentz */ + { + int kk; + for(kk = 0; kk < 3; kk++) + { + SphP[i].BConserved[kk] /= sqrt(4. * M_PI); + SphP[i].B[kk] /= sqrt(4. * M_PI); + } + } + } + else + { + SphP[i].BConserved[0] = SphP[i].B[0] * SphP[i].Volume; + SphP[i].BConserved[1] = SphP[i].B[1] * SphP[i].Volume; + SphP[i].BConserved[2] = SphP[i].B[2] * SphP[i].Volume; + } +#else /* #ifdef MHD_SEEDFIELD */ + SphP[i].BConserved[0] = SphP[i].B[0] * SphP[i].Volume; + SphP[i].BConserved[1] = SphP[i].B[1] * SphP[i].Volume; + SphP[i].BConserved[2] = SphP[i].B[2] * SphP[i].Volume; + +#endif /* #ifdef MHD_SEEDFIELD #else */ +#endif /* #ifdef MHD */ + + /* utherm has been loaded from IC file */ +#ifdef MESHRELAX + SphP[i].Energy = P[i].Mass * SphP[i].Utherm; +#else /* #ifdef MESHRELAX */ + SphP[i].Energy = P[i].Mass * All.cf_atime * All.cf_atime * SphP[i].Utherm + + 0.5 * P[i].Mass * (P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]); +#endif /* #ifdef MESHRELAX #else */ + +#ifdef MHD + SphP[i].Energy += 0.5 * (SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]) * + SphP[i].Volume * All.cf_atime; +#endif /* #ifdef MHD */ + + for(j = 0; j < 3; j++) + SphP[i].VelVertex[j] = P[i].Vel[j]; + + mass += P[i].Mass; + } + +#ifdef PASSIVE_SCALARS + for(i = 0; i < NumGas; i++) + { + for(j = 0; j < PASSIVE_SCALARS; j++) + SphP[i].PConservedScalars[j] = SphP[i].PScalars[j] * P[i].Mass; + } + +#endif /* #ifdef PASSIVE_SCALARS */ + + if(RestartFlag == 17) + { + update_primitive_variables(); + exchange_primitive_variables(); + calculate_gradients(); + exchange_primitive_variables_and_gradients(); + DumpFlag = 1; + savepositions(RestartSnapNum + 1, 0); + return (0); + } + + update_primitive_variables(); + +#ifdef TREE_BASED_TIMESTEPS + tree_based_timesteps_setsoundspeeds(); +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + /* initialize star formation rate */ +#if defined(USE_SFR) + sfr_init(); +#endif /* #if defined(USE_SFR) */ + +#if defined(USE_SFR) + for(i = 0; i < NumGas; i++) + SphP[i].Sfr = get_starformation_rate(i); +#endif /* #if defined(USE_SFR) */ + + update_primitive_variables(); + + exchange_primitive_variables(); + + calculate_gradients(); + + exchange_primitive_variables_and_gradients(); + +#if !defined(ONEDIMS) && !defined(TWODIMS) + int xaxis, yaxis, zaxis, weight_flag = 0; + double xmin, xmax, ymin, ymax, zmin, zmax; +#endif /* #if !defined(ONEDIMS) && !defined(TWODIMS) */ + + free_mesh(); + + return -1; // return -1 means we ran to completion, i.e. not an endrun code +} + +/*! \brief This routine computes the mass content of the box and compares it + * to the specified value of Omega-matter. + * + * If discrepant, the run is terminated. + * + * \return void + */ +void check_omega(void) +{ + double mass = 0, masstot, omega; + double mass_b = 0, masstot_b, omega_b; + int i, n_b = 0; + + for(i = 0; i < NumPart; i++) + { + mass += P[i].Mass; + if(P[i].Type == 0) + { + mass_b += P[i].Mass; + n_b += 1; + } +#ifdef USE_SFR + if(P[i].Type == 4) + { + mass_b += P[i].Mass; + n_b += 1; + } +#endif /* #ifdef USE_SFR */ + } + MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&mass_b, &masstot_b, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + omega = masstot / (All.BoxSize * All.BoxSize * All.BoxSize) / (3 * All.Hubble * All.Hubble / (8 * M_PI * All.G)); + omega_b = masstot_b / (All.BoxSize * All.BoxSize * All.BoxSize) / (3 * All.Hubble * All.Hubble / (8 * M_PI * All.G)); + + if(n_b > 0) + { + if(fabs((omega - All.Omega0) / omega) > 1.0e-1 || fabs((omega_b - All.OmegaBaryon) / omega_b) > 1.0e-1) + { +#ifndef TWODIMS + mpi_terminate( + "\n\nI've found something odd!\nThe mass content accounts for Omega=%g and OmegaBaryon=%g,\nbut you specified Omega=%g " + "and OmegaBaryon=%g in the parameterfile.\n\nI better stop.\n", + omega, omega_b, All.Omega0, All.OmegaBaryon); +#endif /* #ifndef TWODIMS */ + } + + if(fabs((omega - All.Omega0) / omega) > 1.0e-3 || fabs((omega_b - All.OmegaBaryon) / omega_b) > 1.0e-3) + if(ThisTask == 0) + warn( + "I've found something odd! The mass content accounts for Omega=%g and OmegaBaryon=%g, but you specified Omega=%g and " + "OmegaBaryon=%g in the parameterfile.", + omega, omega_b, All.Omega0, All.OmegaBaryon); + } + else + { + if(All.OmegaBaryon != 0) + if(ThisTask == 0) + warn( + "We are running with no baryons, even though you have specified OmegaBaryon=%g in the parameterfile. Please make sure " + "you really want this.\n\n", + All.OmegaBaryon); + + if(fabs((omega - All.Omega0) / omega) > 1.0e-1) + { +#ifndef TWODIMS + mpi_terminate( + "\n\nI've found something odd!\nThe mass content accounts for Omega=%g and OmegaBaryon=%g,\nbut you specified Omega=%g " + "and OmegaBaryon=%g in the parameterfile.\n\nI better stop.\n", + omega, omega_b, All.Omega0, All.OmegaBaryon); +#endif /* #ifndef TWODIMS */ + } + + if(fabs((omega - All.Omega0) / omega) > 1.0e-3) + if(ThisTask == 0) + warn( + "I've found something odd! The mass content accounts for Omega=%g and OmegaBaryon=%g, but you specified Omega=%g and " + "OmegaBaryon=%g in the parameterfile.", + omega, omega_b, All.Omega0, All.OmegaBaryon); + } +} + +/*! \brief This function is used to find an initial SPH smoothing length for + * each cell. + * + * It guarantees that the number of neighbours will be between + * desired_ngb-MAXDEV and desired_ngb+MAXDEV. For simplicity, a first guess + * of the smoothing length is provided to the function density(), which will + * then iterate if needed to find the right smoothing length. + * + * \return void + */ +void setup_smoothinglengths(void) +{ + int i, no, p; + double *save_masses = mymalloc("save_masses", NumGas * sizeof(double)); + + for(i = 0; i < NumGas; i++) + { +#ifdef NO_GAS_SELFGRAVITY + /* This is needed otherwise the force tree will not be constructed for gas particles */ + P[i].Type = -1; +#endif /* #ifdef NO_GAS_SELFGRAVITY */ + save_masses[i] = P[i].Mass; + P[i].Mass = 1.0; + } + +#ifdef HIERARCHICAL_GRAVITY + TimeBinsGravity.NActiveParticles = 0; + for(i = 0; i < NumGas; i++) + { + TimeBinsGravity.ActiveParticleList[TimeBinsGravity.NActiveParticles] = i; + TimeBinsGravity.NActiveParticles++; + } +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + construct_forcetree(1, 1, 0, 0); /* build force tree with gas particles only */ + + for(i = 0; i < NumGas; i++) + { + no = Father[i]; + + if(no < 0) + terminate("i=%d no=%d\n", i, no); + + while(10 * All.DesNumNgb * P[i].Mass > Nodes[no].u.d.mass) + { + p = Nodes[no].u.d.father; + + if(p < 0) + break; + + no = p; + } +#ifndef TWODIMS + SphP[i].Hsml = pow(3.0 / (4 * M_PI) * All.DesNumNgb * P[i].Mass / Nodes[no].u.d.mass, 1.0 / 3) * Nodes[no].len; +#else /* #ifndef TWODIMS */ + SphP[i].Hsml = pow(1.0 / (M_PI)*All.DesNumNgb * P[i].Mass / Nodes[no].u.d.mass, 1.0 / 2) * Nodes[no].len; +#endif /* #ifndef TWODIMS #else */ +#ifdef NO_GAS_SELFGRAVITY + /* Reset the original particle type */ + P[i].Type = 0; +#endif /* #ifdef NO_GAS_SELFGRAVITY */ + } + + myfree(Father); + myfree(Nextnode); + + myfree(Tree_Points); + force_treefree(); + + density(); + + for(i = 0; i < NumGas; i++) + P[i].Mass = save_masses[i]; + + myfree(save_masses); + + for(i = 0; i < NumGas; i++) + SphP[i].MaxDelaunayRadius = SphP[i].Hsml; + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + ngb_treefree(); + domain_free(); + domain_Decomposition(); + ngb_treeallocate(); + ngb_treebuild(NumGas); +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ +} + +/*! \brief This function checks for unique particle IDs. + * + * The particle IDs are copied to an array and then sorted among all tasks. + * This array is then checked for duplicates. In that case the code + * terminates. + * + * \return void + */ +void test_id_uniqueness(void) +{ + int i; + double t0, t1; + MyIDType *ids, *ids_first; + + mpi_printf("INIT: Testing ID uniqueness...\n"); + + if(NumPart == 0) + terminate("need at least one particle per cpu\n"); + + t0 = second(); + + ids = (MyIDType *)mymalloc("ids", NumPart * sizeof(MyIDType)); + ids_first = (MyIDType *)mymalloc("ids_first", NTask * sizeof(MyIDType)); + + for(i = 0; i < NumPart; i++) + ids[i] = P[i].ID; + + parallel_sort(ids, NumPart, sizeof(MyIDType), compare_IDs); + + for(i = 1; i < NumPart; i++) + { + if(ids[i] == ids[i - 1]) + terminate("non-unique ID=%lld found on task=%d (i=%d NumPart=%d)\n", (long long)ids[i], ThisTask, i, NumPart); + } + MPI_Allgather(&ids[0], sizeof(MyIDType), MPI_BYTE, ids_first, sizeof(MyIDType), MPI_BYTE, MPI_COMM_WORLD); + + if(ThisTask < NTask - 1) + { + if(ids[NumPart - 1] == ids_first[ThisTask + 1]) + terminate("non-unique ID=%lld found on task=%d\n", (long long)ids[NumPart - 1], ThisTask); + } + myfree(ids_first); + myfree(ids); + + t1 = second(); + + mpi_printf("INIT: success. took=%g sec\n", timediff(t0, t1)); +} + +/*! \brief Calculates global maximum of the IDs of all particles. + * + * This is needed for REFINEMENT_SPLIT_CELLS. + * + * \return void + */ +void calculate_maxid(void) +{ + /* determine maximum ID */ + MyIDType maxid, *tmp; + int i; + + for(i = 0, maxid = 0; i < NumPart; i++) + if(P[i].ID > maxid) + { + maxid = P[i].ID; + } + + tmp = mymalloc("tmp", NTask * sizeof(MyIDType)); + + MPI_Allgather(&maxid, sizeof(MyIDType), MPI_BYTE, tmp, sizeof(MyIDType), MPI_BYTE, MPI_COMM_WORLD); + + for(i = 0; i < NTask; i++) + if(tmp[i] > maxid) + maxid = tmp[i]; + +#if defined(REFINEMENT_SPLIT_CELLS) || defined(USE_SFR) + All.MaxID = maxid; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) || defined(USE_SFR) */ + + myfree(tmp); +} + +/*! \brief Comparison function for two MyIDType objects. + * + * Used as sorting-kernel for id_uniqueness check. + * + * \return (-1,0,1), -1 if ab + */ +int compare_IDs(const void *a, const void *b) +{ + if(*((MyIDType *)a) < *((MyIDType *)b)) + return -1; + + if(*((MyIDType *)a) > *((MyIDType *)b)) + return +1; + + return 0; +} diff --git a/src/amuse/community/arepo/src/io/global.c b/src/amuse/community/arepo/src/io/global.c new file mode 100644 index 0000000000..e32ace4300 --- /dev/null +++ b/src/amuse/community/arepo/src/io/global.c @@ -0,0 +1,257 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/global.c + * \date 05/2018 + * \brief Routines to compute statistics of the global state of the + * code. + * \details contains functions: + * void compute_statistics(void) + * void energy_statistics(void) + * void compute_global_quantities_of_system(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Computes new global statistics if needed (call of + * energy_statistics()). + * + * \return void + */ +void compute_statistics(void) +{ + /* check whether we want a full energy statistics */ + if((All.Time - All.TimeLastStatistics) >= All.TimeBetStatistics && + All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) /* allow only top-level synchronization points */ + { + TIMER_START(CPU_LOGS); + + energy_statistics(); /* compute and output energy statistics */ + + All.TimeLastStatistics += All.TimeBetStatistics; + + TIMER_STOP(CPU_LOGS); + } +} + +/*! \brief Compute global statistics of the system. + * + * This function first calls a computation of various global + * quantities of the particle distribution + * (compute_global_quantities_of_system() ), and then writes some statistics + * about the energies of the various particle types to the file FdEnergy + * (energy.txt). + * + * \return void + */ +void energy_statistics(void) +{ + double egyinj_tot; + + compute_global_quantities_of_system(); + + MPI_Reduce(&EgyInjection, &egyinj_tot, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + fprintf(FdEnergy, "%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g\n", All.Time, + SysState.EnergyInt, SysState.EnergyPot, SysState.EnergyKin, SysState.EnergyIntComp[0], SysState.EnergyPotComp[0], + SysState.EnergyKinComp[0], SysState.EnergyIntComp[1], SysState.EnergyPotComp[1], SysState.EnergyKinComp[1], + SysState.EnergyIntComp[2], SysState.EnergyPotComp[2], SysState.EnergyKinComp[2], SysState.EnergyIntComp[3], + SysState.EnergyPotComp[3], SysState.EnergyKinComp[3], SysState.EnergyIntComp[4], SysState.EnergyPotComp[4], + SysState.EnergyKinComp[4], SysState.EnergyIntComp[5], SysState.EnergyPotComp[5], SysState.EnergyKinComp[5], + SysState.MassComp[0], SysState.MassComp[1], SysState.MassComp[2], SysState.MassComp[3], SysState.MassComp[4], + SysState.MassComp[5], egyinj_tot); + + myflush(FdEnergy); + } +} + +/*! \brief This routine computes various global properties of the particle + * distribution and stores the result in the struct `SysState'. + * + * Currently, not all the information that's computed here is + * actually used (e.g. momentum is not really used anywhere), + * just the energies are written to a log-file every once in a while. + * + * \return void + */ +void compute_global_quantities_of_system(void) +{ + int i, j, n; + struct state_of_system sys; + double egyspec, vel[3]; + + for(n = 0; n < NTYPES; n++) + { + sys.MassComp[n] = sys.EnergyKinComp[n] = sys.EnergyPotComp[n] = sys.EnergyIntComp[n] = 0; + + for(j = 0; j < 4; j++) + sys.CenterOfMassComp[n][j] = sys.MomentumComp[n][j] = sys.AngMomentumComp[n][j] = 0; + } + + for(i = 0; i < NumPart; i++) + { + sys.MassComp[P[i].Type] += P[i].Mass; + +#if defined(SELFGRAVITY) +#ifdef EVALPOTENTIAL +#ifndef EXACT_GRAVITY_FOR_PARTICLE_TYPE + sys.EnergyPotComp[P[i].Type] += + 0.5 * P[i].Mass * (P[i].Potential + All.G * P[i].Mass / (All.ForceSoftening[P[i].SofteningType] / 2.8)) / All.cf_atime; +#else /* #ifndef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + /* ignore self-contribution from gravity if exact gravity is used */ + if(P[i].Type == EXACT_GRAVITY_FOR_PARTICLE_TYPE) + sys.EnergyPotComp[P[i].Type] += 0.5 * P[i].Mass * P[i].Potential / All.cf_atime; + else + sys.EnergyPotComp[P[i].Type] += + 0.5 * P[i].Mass * (P[i].Potential + All.G * P[i].Mass / (All.ForceSoftening[P[i].SofteningType] / 2.8)) / All.cf_atime; +#endif /* #ifndef EXACT_GRAVITY_FOR_PARTICLE_TYPE #else */ +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #if defined(SELFGRAVITY) */ + +#if defined(EXTERNALGRAVITY) +#if defined(SELFGRAVITY) + sys.EnergyPotComp[P[i].Type] += 0.5 * P[i].Mass * P[i].ExtPotential; /* note: ExtPotential already included on P[].p.Potential, + that's why only 0.5 is needed here to recover the rest */ +#else /* #if defined(SELFGRAVITY) */ + sys.EnergyPotComp[P[i].Type] += 1.0 * P[i].Mass * P[i].ExtPotential; +#endif /* #if defined(SELFGRAVITY) #else */ +#endif /* #if defined(EXTERNALGRAVITY) */ + + if(P[i].Type == 0) + { + for(j = 0; j < 3; j++) + { + vel[j] = P[i].Vel[j]; + } + + sys.EnergyKinComp[0] += 0.5 * P[i].Mass * (vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]); + + egyspec = SphP[i].Utherm; + + sys.EnergyIntComp[0] += P[i].Mass * egyspec; + } + else + { + for(j = 0; j < 3; j++) + { + vel[j] = P[i].Vel[j]; + } + sys.EnergyKinComp[P[i].Type] += 0.5 * P[i].Mass * (vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]) * All.cf_a2inv; + } + + for(j = 0; j < 3; j++) + { + sys.MomentumComp[P[i].Type][j] += P[i].Mass * vel[j]; + sys.CenterOfMassComp[P[i].Type][j] += P[i].Mass * P[i].Pos[j]; + } + + sys.AngMomentumComp[P[i].Type][0] += P[i].Mass * (P[i].Pos[1] * vel[2] - P[i].Pos[2] * vel[1]); + sys.AngMomentumComp[P[i].Type][1] += P[i].Mass * (P[i].Pos[2] * vel[0] - P[i].Pos[0] * vel[2]); + sys.AngMomentumComp[P[i].Type][2] += P[i].Mass * (P[i].Pos[0] * vel[1] - P[i].Pos[1] * vel[0]); + } + + /* some the stuff over all processors */ + MPI_Reduce(&sys.MassComp[0], &SysState.MassComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.EnergyPotComp[0], &SysState.EnergyPotComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.EnergyIntComp[0], &SysState.EnergyIntComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.EnergyKinComp[0], &SysState.EnergyKinComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.MomentumComp[0][0], &SysState.MomentumComp[0][0], NTYPES * 4, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.AngMomentumComp[0][0], &SysState.AngMomentumComp[0][0], NTYPES * 4, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.CenterOfMassComp[0][0], &SysState.CenterOfMassComp[0][0], NTYPES * 4, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + for(i = 0; i < NTYPES; i++) + SysState.EnergyTotComp[i] = SysState.EnergyKinComp[i] + SysState.EnergyPotComp[i] + SysState.EnergyIntComp[i]; + + SysState.Mass = SysState.EnergyKin = SysState.EnergyPot = SysState.EnergyInt = SysState.EnergyTot = 0; + + for(j = 0; j < 3; j++) + SysState.Momentum[j] = SysState.AngMomentum[j] = SysState.CenterOfMass[j] = 0; + + for(i = 0; i < NTYPES; i++) + { + SysState.Mass += SysState.MassComp[i]; + SysState.EnergyKin += SysState.EnergyKinComp[i]; + SysState.EnergyPot += SysState.EnergyPotComp[i]; + SysState.EnergyInt += SysState.EnergyIntComp[i]; + SysState.EnergyTot += SysState.EnergyTotComp[i]; + + for(j = 0; j < 3; j++) + { + SysState.Momentum[j] += SysState.MomentumComp[i][j]; + SysState.AngMomentum[j] += SysState.AngMomentumComp[i][j]; + SysState.CenterOfMass[j] += SysState.CenterOfMassComp[i][j]; + } + } + + for(i = 0; i < NTYPES; i++) + for(j = 0; j < 3; j++) + if(SysState.MassComp[i] > 0) + SysState.CenterOfMassComp[i][j] /= SysState.MassComp[i]; + + for(j = 0; j < 3; j++) + if(SysState.Mass > 0) + SysState.CenterOfMass[j] /= SysState.Mass; + + for(i = 0; i < NTYPES; i++) + { + SysState.CenterOfMassComp[i][3] = SysState.MomentumComp[i][3] = SysState.AngMomentumComp[i][3] = 0; + for(j = 0; j < 3; j++) + { + SysState.CenterOfMassComp[i][3] += SysState.CenterOfMassComp[i][j] * SysState.CenterOfMassComp[i][j]; + SysState.MomentumComp[i][3] += SysState.MomentumComp[i][j] * SysState.MomentumComp[i][j]; + SysState.AngMomentumComp[i][3] += SysState.AngMomentumComp[i][j] * SysState.AngMomentumComp[i][j]; + } + SysState.CenterOfMassComp[i][3] = sqrt(SysState.CenterOfMassComp[i][3]); + SysState.MomentumComp[i][3] = sqrt(SysState.MomentumComp[i][3]); + SysState.AngMomentumComp[i][3] = sqrt(SysState.AngMomentumComp[i][3]); + } + + SysState.CenterOfMass[3] = SysState.Momentum[3] = SysState.AngMomentum[3] = 0; + + for(j = 0; j < 3; j++) + { + SysState.CenterOfMass[3] += SysState.CenterOfMass[j] * SysState.CenterOfMass[j]; + SysState.Momentum[3] += SysState.Momentum[j] * SysState.Momentum[j]; + SysState.AngMomentum[3] += SysState.AngMomentum[j] * SysState.AngMomentum[j]; + } + + SysState.CenterOfMass[3] = sqrt(SysState.CenterOfMass[3]); + SysState.Momentum[3] = sqrt(SysState.Momentum[3]); + SysState.AngMomentum[3] = sqrt(SysState.AngMomentum[3]); + } + + /* give everyone the result, maybe the want to do something with it */ + MPI_Bcast(&SysState, sizeof(struct state_of_system), MPI_BYTE, 0, MPI_COMM_WORLD); +} diff --git a/src/amuse/community/arepo/src/io/hdf5_util.c b/src/amuse/community/arepo/src/io/hdf5_util.c new file mode 100644 index 0000000000..a613a36bdc --- /dev/null +++ b/src/amuse/community/arepo/src/io/hdf5_util.c @@ -0,0 +1,881 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/hdf5_util.c + * \date 05/2018 + * \brief Contains the wrapper functions to the HDF5 library functions. + * \details The wrapper functions explicitly check for error conditions + * and terminate the run if such conditions occur. The HDF5 error + * handler is disabled in case of termination not to repeat the + * error message of the handler again at the program exit. + * + * \par Major modifications and contributions: + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef HAVE_HDF5 +#ifndef HDF5UTIL_H +#define HDF5UTIL_H +#include + +/*! \brief Wraps creating a file to give a nice error message. + * + * Calls H5Fcreate. + * + * \param[in] fname File name. + * \param[in] flags Flags handed to H5Fcreate. + * \param[in] fcpl_id File creation property list identifier, used when + * modifying default file meta-data. Use H5P_DEFAULT to specify + * default file creation properties. + * \param[in] fapl_id File access property list identifier. If parallel file + * access is desired, this is a collective call according to the + * communicator stored in the fapl_id. Use H5P_DEFAULT for default + * file access properties. + * + * \return File identifier. + */ +hid_t my_H5Fcreate(const char *fname, unsigned int flags, hid_t fcpl_id, hid_t fapl_id) +{ + hid_t file_id = H5Fcreate(fname, flags, fcpl_id, fapl_id); + +#ifndef TOLERATE_WRITE_ERROR + if(file_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to create file %s\n", ThisTask, fname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return file_id; +} + +/*! \brief Wraps creating a group to give a nice error message. + * + * Calls H5Gcreate. + * + * \param[in] loc_id File or group identifier. + * \param[in] groupname Absolute or relative name of the o new group. + * \param[in] size_hint Optional parameter indicating the number of bytes to + * reserve for the names that will appear in the group. A + * conservative estimate could result in multiple system-level + * I/O requests to read the group name heap; a liberal estimate + * could result in a single large I/O request even when the group + * has just a few names. HDF5 stores each name with a null + * terminator. + * + * \return Group identifier. + */ +hid_t my_H5Gcreate(hid_t loc_id, const char *groupname, size_t size_hint) +{ + hid_t group_id = H5Gcreate(loc_id, groupname, size_hint); + +#ifndef TOLERATE_WRITE_ERROR + if(group_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to create group %s\n", ThisTask, groupname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return group_id; +} + +/*! \brief Wraps creating a dataset to give a nice error message. + * + * Calls H5Dcreate. + * + * \param[in] loc_id Identifier of the file or group within which to create + * the dataset. + * \param[in] datasetname The name of the dataset to create. + * \param[in] type_id Identifier of the datatype to use when creating the + * dataset. + * \param[in] space_id Identifier of the dataspace to use when creating the + * dataset. + * \param[in] dcpl_id Dataset creation property list identifier. + * + * \return Dataset identifier. + */ +hid_t my_H5Dcreate(hid_t loc_id, const char *datasetname, hid_t type_id, hid_t space_id, hid_t dcpl_id) +{ + hid_t dataset_id = H5Dcreate(loc_id, datasetname, type_id, space_id, dcpl_id); + +#ifndef TOLERATE_WRITE_ERROR + if(dataset_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, Error detected in HDF5: unable to create dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return dataset_id; +} + +/*! \brief Wraps writing a dataset to give a nice error message. + * + * Calls H5Dwrite. + * + * \param[in] dataset_id Identifier of the dataset to write to. + * \param[in] mem_type_id Identifier of the memory datatype. + * \param[in] mem_space_id Identifier of the memory dataspace. + * \param[in] file_space_id Identifier of the dataset's dataspace in the file. + * \param[in] xfer_plist_id Identifier of a transfer property list for this + * I/O operation. + * \param[in] buf Buffer with data to be written to the file. + * \param[in] datasetname Name of dataset (for error message only) + * + * \return Status of write operation. + */ +herr_t my_H5Dwrite(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, const void *buf, + const char *datasetname) +{ +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag) + return 0; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + herr_t status = H5Dwrite(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, buf); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to write dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps creating an attribute to give a nice error message. + * + * \param[in] loc_id Identifier for the object to which the attribute is to be + * attached. May be any HDF5 object identifier (group, dataset, or + * committed datatype) or an HDF5 file identifier; if loc_id is a + * file identifer, the attribute will be attached to that file's + * root group. + * \param[in] attr_name Name of attribute to create. + * \param[in] type_id Identifier of datatype for attribute. + * \param[in] space_id Identifier of dataspace for attribute. + * \param[in] acpl_id Identifier of creation property list (specify + * H5P_DEFAULT). + * + * \return Attribute identifier. + */ +hid_t my_H5Acreate(hid_t loc_id, const char *attr_name, hid_t type_id, hid_t space_id, hid_t acpl_id) +{ + hid_t attribute_id = H5Acreate(loc_id, attr_name, type_id, space_id, acpl_id); + +#ifndef TOLERATE_WRITE_ERROR + if(attribute_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to create attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return attribute_id; +} + +/*! \brief Wraps writing an attribute to give a nice error message. + * + * \param[in] attr_id Identifier of an attribute to write. + * \param[in] mem_type_id Identifier of the attribute datatype (in memory). + * \param[in] buf Data to be written. + * \param[in] attr_name Name of attribute (for error message only). + * + * \return status (non-negative if successful). + */ +herr_t my_H5Awrite(hid_t attr_id, hid_t mem_type_id, const void *buf, const char *attr_name) +{ +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag) + return 0; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + herr_t status = H5Awrite(attr_id, mem_type_id, buf); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to write attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps creating a dataspace to give a nice error message. + * + * \param[in] type Type of dataspace to be created. + * + * \return Dataspace identifier if successful. + */ +hid_t my_H5Screate(H5S_class_t type) +{ + hid_t dataspace_id = H5Screate(type); + +#ifndef TOLERATE_WRITE_ERROR + if(dataspace_id < 0) + { + H5Eset_auto(NULL, NULL); + switch(type) + { + case H5S_SCALAR: + terminate("On Task %d, error detected in HDF5: unable to create a scalar dataspace\n", ThisTask); + break; + case H5S_SIMPLE: + terminate("On Task %d, error detected in HDF5: unable to create a simple dataspace\n", ThisTask); + break; + default: + terminate("On Task %d, error detected in HDF5: unknown dataspace type\n", ThisTask); + break; + } + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return dataspace_id; +} + +/*! \brief Wraps creating a simple dataspace to give a nice error message. + * + * \param[in] rank Number of dimensions of dataspace. + * \param[in] current_dims Array specifying the size of each dimension. + * \param[in] maximum_dims Array specifying the maximum size of each + * dimension. + * + * \return Dataspace identifier if successful. + */ +hid_t my_H5Screate_simple(int rank, const hsize_t *current_dims, const hsize_t *maximum_dims) +{ + hid_t dataspace_id = H5Screate_simple(rank, current_dims, maximum_dims); + +#ifndef TOLERATE_WRITE_ERROR + if(dataspace_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to create a simple dataspace\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return dataspace_id; +} + +/*! \brief Wraps opening a file to give a nice error message. + * + * \param[in] fname Name of the file to be opened. + * \param[in] flags File access flags. Allowable values are: + * H5F_ACC_RDWR -- Allow read and write access to file. + * H5F_ACC_RDONLY -- Allow read-only access to file. + * \param[in] fapl_id Identifier for the file access properties list. If + * parallel file access is desired, this is a collective call + * according to the communicator stored in the fapl_id. Use + * H5P_DEFAULT for default file access properties. + * + * \return File identifier if successful. + */ +hid_t my_H5Fopen(const char *fname, unsigned int flags, hid_t fapl_id) +{ + hid_t file_id = H5Fopen(fname, flags, fapl_id); + + if(file_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to open file %s\n", ThisTask, fname); + } + + return file_id; +} + +/*! \brief Wraps opening a group to give a nice error message. + * + * \param[in] loc_id File or group identifier within which the group is to be + * opened. + * \param[in] groupname Name of group. + * + * \return Valid group identifier if successful. + */ +hid_t my_H5Gopen(hid_t loc_id, const char *groupname) +{ + hid_t group = H5Gopen(loc_id, groupname); + +#ifndef TOLERATE_WRITE_ERROR + if(group < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to open group %s\n", ThisTask, groupname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return group; +} + +/*! \brief Wraps opening a dataset to give a nice error message. + * + * \param[in] file_id Identifier of the file or group within which the + * dataset to be accessed will be found. + * \param[in] datasetname Name of the dataset to access. + * + * \return Dataset identifier if successful. + */ +hid_t my_H5Dopen(hid_t file_id, const char *datasetname) +{ + hid_t dataset = H5Dopen(file_id, datasetname); + +#ifndef TOLERATE_WRITE_ERROR + if(dataset < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to open dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return dataset; +} + +/*! \brief Wraps opening a dataset. + * + * In contrast to my_H5Dpoen(), if the dataset does not exist it does not + * terminate the run. This is useful while reading an ICs file + * because in that case a non-exisitng dataset is put to zero (see also + * read_ic.c). + * + * \param[in] file_id file_id Identifier of the file or group within which the + * dataset to be accessed will be found. + * \param[in] datasetname Name of the dataset to access. + * + * \return Dataset identifier if successful; otherwise negative value. + */ +hid_t my_H5Dopen_if_existing(hid_t file_id, const char *datasetname) +{ + /* save error handler and disable it */ + H5E_auto_t errfunc; + void *client_data; + H5Eget_auto(&errfunc, &client_data); + H5Eset_auto(NULL, NULL); + + hid_t dataset = H5Dopen(file_id, datasetname); + + /* reset error handler */ + H5Eset_auto(errfunc, client_data); + + return dataset; +} + +/*! \brief Wraps opening an attribute to give a nice error message. + * + * \param[in] loc_id Identifier of a group, dataset, or named datatype that + * attribute is attached to. + * \param[in] attr_name Attribute name. + * + * \return Returns attribute identifier if successful. + */ +hid_t my_H5Aopen_name(hid_t loc_id, const char *attr_name) +{ + hid_t attribute_id = H5Aopen_name(loc_id, attr_name); + +#ifndef TOLERATE_WRITE_ERROR + if(attribute_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to open attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return attribute_id; +} + +/*! \brief Wraps reading a dataset to give a nice error message. + * + * \param[in] dataset_id Identifier of the dataset read from. + * \param[in] mem_type_id Identifier of the memory datatype. + * \param[in] mem_space_id Identifier of the memory dataspace. + * \param[in] file_space_id Identifier of the dataset's dataspace in the file. + * \param[in] xfer_plist_id Identifier of a transfer property list for this + * I/O operation. + * \param[out] buf Buffer to receive data read from file. + * \param[in] datasetname Name of dataset (only for error message). + * + * \return Returns a non-negative value if successful. + */ +herr_t my_H5Dread(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, void *buf, + const char *datasetname) +{ + herr_t status = H5Dread(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, buf); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to read dataset %s\n", ThisTask, datasetname); + } + return status; +} + +/*! \brief Wraps makeing a copy of the dataspace to give a nice error message. + * + * \param[in] dataset_id Identifier of the dataset to query. + * \param[in] datasetname Name of the dataset (for error message only). + * + * \return Dataspace identifier if successful. + */ +hid_t my_H5Dget_space(hid_t dataset_id, const char *datasetname) +{ + hid_t status = H5Dget_space(dataset_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to determine space for dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps reading an attribute to give a nice error message + * + * \param[in] attr_id Identifier of an attribute to read. + * \param[in] mem_type_id Identifier of the attribute datatype (in memory). + * \param[out] buf Buffer for data to be read. + * \param[in] attr_name Name of the attribute. + * \param[in] size Size of the attribute. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Aread(hid_t attr_id, hid_t mem_type_id, void *buf, const char *attr_name, hssize_t size) +{ + hid_t hdf5_space = H5Aget_space(attr_id); + hssize_t attr_size = H5Sget_simple_extent_npoints(hdf5_space); + H5Sclose(hdf5_space); + + if(attr_size != size) + { + H5Eset_auto(NULL, NULL); + terminate( + "On Task %d, error detected in HDF5: mismatch in size for attribute %s, expected size = %lld, actual attribute size = " + "%lld\n", + ThisTask, attr_name, size, attr_size); + } + + herr_t status = H5Aread(attr_id, mem_type_id, buf); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to read attribute %s\n", ThisTask, attr_name); + } + return status; +} + +/*! \brief Wraps reseting the size of an existing dataspace to give a nice + * error message. + * + * \param[in] space_id Dataspace identifier. + * \param[in] rank Rank, or dimensionality, of the dataspace. + * \param[in] current_size Array containing current size of dataspace. + * \param[in] maximum_size Array containing maximum size of dataspace. + * \param[in] attr_name Name of attribute (only for error message). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Sset_extent_simple(hid_t space_id, int rank, const hsize_t *current_size, const hsize_t *maximum_size, + const char *attr_name) +{ + herr_t status = H5Sset_extent_simple(space_id, rank, current_size, maximum_size); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to set extent for attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps closing an attribute to give a nice error message. + * + * \param[in] attr_id Attribute to release access to. + * \param[in] attr_name Name of the attribute (for error message only). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Aclose(hid_t attr_id, const char *attr_name) +{ + herr_t status = H5Aclose(attr_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to close attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps closing a dataset to give a nice error message. + * + * \param[in] dataset_id Identifier of the dataset to close access to. + * \param[in] datasetname Name of the dataset (for error message only). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Dclose(hid_t dataset_id, const char *datasetname) +{ + herr_t status = H5Dclose(dataset_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to close dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps closing a group to give a nice error message. + * + * \param[in] group_id Group identifier to release. + * \param[in] groupname Name of the group (for error message only). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Gclose(hid_t group_id, const char *groupname) +{ + herr_t status = H5Gclose(group_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to close group %s\n", ThisTask, groupname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps closing a file to give a nice error message. + * + * \param[in] file_id Identifier of a file to terminate access to. + * \param[in] fname File name (for error message only). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Fclose(hid_t file_id, const char *fname) +{ + herr_t status = H5Fclose(file_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to close file %s\n", ThisTask, fname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return status; +} + +/*! \brief Wraps releasing and terminating access to a dataspace to give a nice + * error message. + * + * \param[in] dataspace_id Identifier of dataspace to release. + * \param[in] type type of dataspace (simple, scalar,...). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Sclose(hid_t dataspace_id, H5S_class_t type) +{ + herr_t status = H5Sclose(dataspace_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + switch(type) + { + case H5S_SCALAR: + terminate("On Task %d, error detected in HDF5: unable to close a scalar dataspace\n", ThisTask); + break; + case H5S_SIMPLE: + terminate("On Task %d, error detected in HDF5: unable to close a simple dataspace\n", ThisTask); + break; + default: + terminate("On Task %d, error detected in HDF5: unknown dataspace type\n", ThisTask); + break; + } + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps copying an existing datatype to give a nice error message. + * + * \param[in] type_id Identifier of datatype to copy. Can be a datatype + * identifier, a predefined datatype (defined in H5Tpublic.h), or + * a dataset identifier. + * + * \return Datatype identifier if successful. + */ +hid_t my_H5Tcopy(hid_t type_id) +{ + hid_t datatype_id = H5Tcopy(type_id); +#ifndef TOLERATE_WRITE_ERROR + if(datatype_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly copy datatype\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return datatype_id; +} + +/*! \brief Wraps closing a datatype to give a nice error message. + * + * \param[in] type_id Identifier of datatype to release. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Tclose(hid_t type_id) +{ + herr_t status = H5Tclose(type_id); +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly close datatype\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return status; +} + +/*! \brief Wraps selecting a hyperslab to give a nice error message. + * + * \param[in] space_id Identifier of dataspace selection to modify. + * \param[in] op Operation to perform on current selection. + * \param[in] start Offset of start of hyperslab. + * \param[in] stride Hyperslab stride. + * \param[in] count Number of blocks included in hyperslab. + * \param[in] block Size of block in hyperslab. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Sselect_hyperslab(hid_t space_id, H5S_seloper_t op, const hsize_t *start, const hsize_t *stride, const hsize_t *count, + const hsize_t *block) +{ + herr_t status = H5Sselect_hyperslab(space_id, op, start, stride, count, block); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly select the chosen hyperslab\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return status; +} + +/*! \brief Wraps returning the size in bytes of a given datatype to give a nice + * error message. + * + * \param[in] datatype_id Identifier of datatype to query. + * + * \return The size of the datatype in bytes. + */ +size_t my_H5Tget_size(hid_t datatype_id) +{ + size_t size = H5Tget_size(datatype_id); + +#ifndef TOLERATE_WRITE_ERROR + if(size == 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to determine the size of the given datatype\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return size; +} + +/*! \brief Wraps setting the size in bytes of a given datatype to give a nice + * error message. + * + * \param[in] datatype_id Identifier of datatype for which the size is being + * changed. + * \param[in] size New datatype size in bytes or H5T_VARIABLE. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Tset_size(hid_t datatype_id, size_t size) +{ + herr_t status = H5Tset_size(datatype_id, size); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly set the size of the given datatype\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +#ifdef HDF5_FILTERS +/*! \brief Wraps checking if all hdf5 filters selected for plist_id are + * available to give a nice error message. + * + * \param[in] plist_id Dataset or group creation property list identifier. + * + * \return Positive value if all filters are available; + * 0 if at least one filter is not currently available. + */ +htri_t my_H5Pall_filters_avail(hid_t plist_id) +{ + htri_t status = H5Pall_filters_avail(plist_id); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly verify the availability of all filters\n", ThisTask); + } + return status; +} + +/*! \brief Wraps creating the property list of the given property class + * identified by class_id to give a nice error message. + * + * \param[in] The class of the property list to create. + * + * \return Property list identifier if successful. + */ +hid_t my_H5Pcreate(hid_t class_id) +{ + hid_t plist_id = H5Pcreate(class_id); + if(plist_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not create the property list associated to the given property class\n", + ThisTask); + } + return plist_id; +} + +/*! \brief Wraps closing a property list to give a nice error message. + * + * \param[in] Identifier of the property list to terminate access to. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pclose(hid_t plist) +{ + herr_t status = H5Pclose(plist); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not close the input property list\n", ThisTask); + } + return status; +} + +/*! \brief Wraps setting the size of the chunks of a chunked dataset to give a + * nice error message. + * + * \param[in] plist Dataset creation property list identifier. + * \param[in] ndims The number of dimensions of each chunk. + * \param[in] dim An array defining the size, in dataset elements, of each + * chunk. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pset_chunk(hid_t plist, int ndims, const hsize_t *dim) +{ + herr_t status = H5Pset_chunk(plist, ndims, dim); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not set chunk size for the dataset\n", ThisTask); + } + return status; +} + +/*! \brief Wraps setting the use of the shuffle filter to give a nice error + * message. + * + * \param[in] plist_id Dataset creation property list identifier. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pset_shuffle(hid_t plist_id) +{ + herr_t status = H5Pset_shuffle(plist_id); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not set the shuffle filter in the properties list\n", ThisTask); + } + return status; +} + +/*! \brief Wraps setting the use of the deflate compression (gzip) to give a + * nice error message. + * + * \param[in] plist_id Dataset or group creation property list identifier. + * \param[in] level Compression level. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pset_deflate(hid_t plist_id, uint level) +{ + herr_t status = H5Pset_deflate(plist_id, level); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not set the deflate compression in the properties list\n", ThisTask); + } + return status; +} + +/*! \brief Wraps setting the use of the Fletcher32 checksum to give a nice + * error message. + * + * \param plist_id Dataset or group creation property list identifier. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pset_fletcher32(hid_t plist_id) +{ + herr_t status = H5Pset_fletcher32(plist_id); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not set the Fletcher32 checksum in the properties list\n", ThisTask); + } + return status; +} +#endif /* #ifdef HDF5_FILTERS */ + +#endif /* #ifndef HDF5UTIL_H */ +#endif /* #ifdef HAVE_HDF5 */ diff --git a/src/amuse/community/arepo/src/io/io.c b/src/amuse/community/arepo/src/io/io.c new file mode 100644 index 0000000000..f5d9a0c73f --- /dev/null +++ b/src/amuse/community/arepo/src/io/io.c @@ -0,0 +1,2226 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io.c + * \date 05/2018 + * \brief Routines for input and output of snapshot files to disk. + * \details contains functions: + * void init_field + * void init_units + * void init_snapshot_type + * void write_error + * void create_snapshot_if_desired(void) + * void produce_dump(void) + * void savepositions(int num, int subbox_flag) + * void fill_write_buffer + * int get_bytes_per_blockelement + * int get_datatype_in_block(enum iofields blocknr, int mode) + * int get_values_per_blockelement(enum iofields blocknr) + * int get_particles_in_block(enum iofields blocknr, int + * *typelist) + * int blockpresent(enum iofields blocknr, int write) + * void get_Tab_IO_Label(enum iofields blocknr, char *label) + * void get_dataset_name(enum iofields blocknr, char *buf) + * void write_file(char *fname, int writeTask, int lastTask, + * int subbox_flag) + * void write_header_attributes_in_hdf5(hid_t handle) + * void write_parameters_attributes_in_hdf5(hid_t handle) + * herr_t my_hdf5_error_handler(void *unused) + * void write_dataset_attributes(hid_t hdf5_dataset, enum + * iofields blocknr) + * void write_xdmf(char *fname) + * size_t my_fwrite(void *ptr, size_t size, size_t nmemb, + * FILE * stream) + * size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE * + * stream) + * void mpi_printf(const char *fmt, ...) + * void mpi_fprintf(FILE * stream, const char *fmt, ...) + * void mpi_printf_each(const char *fmt, ...) + * FILE *open_file(char *fnam) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/* needs to be included after allvars.h */ +#ifdef OUTPUT_XDMF +#include /* for basename() function */ +#endif /* #ifdef OUTPUT_XDMF */ + +#include "../fof/fof.h" +#include "../gitversion/version.h" +#include "../mesh/voronoi/voronoi.h" + +#ifdef HAVE_HDF5 +#include +void write_header_attributes_in_hdf5(hid_t handle); +void write_parameters_attributes_in_hdf5(hid_t handle); +void write_compile_time_options_in_hdf5(hid_t handle); +void write_dataset_attributes(hid_t hdf5_dataset, enum iofields blocknr); +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef TOLERATE_WRITE_ERROR +static char alternative_fname[MAXLEN_PATH]; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +#ifdef OUTPUT_XDMF +static void write_xdmf(char *fname); +#endif /* #ifdef OUTPUT_XDMF */ + +static int n_type[NTYPES]; /**< contains the local (for a single task) number of particles of each type in the snapshot file */ +static long long ntot_type_all[NTYPES]; /**< contains the global number of particles of each type in the snapshot file */ +static int subbox_dump = 0; + +/*! \brief Function for registering an output field. + * + * Don't forget to add the new IO_FLAG to allvars.h. + * + * \param[in] field Specifies the field as an enumeration type iofields + * (allvars.h), e.g. IO_POS. Don't forget to insert new fields + * also in allvars.h. + * \param[in] label The label of the dataset (4 characters). + * \param[in] datasetname The name of the hdf5 dataset (maximum 256 + * characters). + * \param[in] type_in_memory The type of the field in the memory (use + * MEM_NONE if specifying io_func). + * \param[in] type_in_file_output The output type in the hdf5 file. + * \param[in] type_in_file_input The input type in the hdf5 file (use + * FILE_MY_OUTPUT_TYPE for MyInputType, input is disabled with + * FILE_NONE). + * \param[in] values_per_block The number of values per field, e.g. 1 for + * mass, 3 for velocities. + * \param[in] array The array in which the value is stored. For an io_func + * this influences the particle index, the default (A_NONE) is an + * index into P/SphP, can be changed if required. + * \param[in] pointer_to_field A Pointer to the field in one of the global + * arrays, e.g. &SphP[0].Density, or &P[0].Vel[0]. + * \param[in] io_func Alternatively, if the value to output/input is not a + * simple field, you can define a function which handles i/o. + * \param[in] typelist_bitmask Specifies for which particle type the field is + * present, e.g. 1+2+8 => field present for particle types 0,1,3 + * (or use ALL_TYPES, GAS_ONLY,...). + * + * \return void + */ +void init_field(enum iofields field, const char *label, const char *datasetname, enum types_in_memory type_in_memory, + enum types_in_file type_in_file_output, enum types_in_file type_in_file_input, int values_per_block, enum arrays array, + void *pointer_to_field, void (*io_func)(int, int, void *, int), int typelist_bitmask) +{ + int alloc_step = 5; + + if(Max_IO_Fields == 0) + { + IO_Fields = (IO_Field *)mymalloc("IO_Fields", alloc_step * sizeof(IO_Field)); + Max_IO_Fields = alloc_step; + } + else if(Max_IO_Fields == N_IO_Fields) + { + Max_IO_Fields = ((Max_IO_Fields / alloc_step) + 1) * alloc_step; + IO_Fields = (IO_Field *)myrealloc(IO_Fields, Max_IO_Fields * sizeof(IO_Field)); + } + + IO_Fields[N_IO_Fields].field = field; + strncpy(IO_Fields[N_IO_Fields].label, label, 4); + strncpy(IO_Fields[N_IO_Fields].datasetname, datasetname, 256); + IO_Fields[N_IO_Fields].type_in_memory = type_in_memory; + IO_Fields[N_IO_Fields].type_in_file_output = type_in_file_output; + IO_Fields[N_IO_Fields].type_in_file_input = type_in_file_input; + IO_Fields[N_IO_Fields].values_per_block = values_per_block; + IO_Fields[N_IO_Fields].snap_type = SN_FULL; + IO_Fields[N_IO_Fields].typelist = typelist_bitmask; + + IO_Fields[N_IO_Fields].array = array; + + if(array == A_NONE) + { + IO_Fields[N_IO_Fields].offset = 0; + } + else if(array == A_SPHP) + { + IO_Fields[N_IO_Fields].offset = (size_t)pointer_to_field - (size_t)SphP; + } + else if(array == A_P) + { + IO_Fields[N_IO_Fields].offset = (size_t)pointer_to_field - (size_t)P; + } + else if(array == A_PS) + { + IO_Fields[N_IO_Fields].offset = (size_t)pointer_to_field - (size_t)PS; + } + + IO_Fields[N_IO_Fields].io_func = io_func; + + // validate types + if(type_in_memory == MEM_INT && + ((type_in_file_input != FILE_NONE && type_in_file_input != FILE_INT) || type_in_file_output != FILE_INT)) + { + terminate("combination of datatypes not supported (field %s)", datasetname); + } + + if(type_in_memory == MEM_MY_ID_TYPE && + ((type_in_file_input != FILE_NONE && type_in_file_input != FILE_MY_ID_TYPE) || type_in_file_output != FILE_MY_ID_TYPE)) + { + terminate("combination of datatypes not supported (field %s)", datasetname); + } + + if((type_in_memory == MEM_FLOAT || type_in_memory == MEM_MY_SINGLE || type_in_memory == MEM_DOUBLE) && + ((type_in_file_input != FILE_NONE && (type_in_file_input == FILE_MY_ID_TYPE || type_in_file_input == FILE_INT)) || + type_in_file_output == FILE_INT || type_in_file_output == FILE_MY_ID_TYPE)) + { + terminate("combination of datatypes not supported (field %s)", datasetname); + } + + IO_Fields[N_IO_Fields].a = 0.; + IO_Fields[N_IO_Fields].h = 0.; + IO_Fields[N_IO_Fields].L = 0.; + IO_Fields[N_IO_Fields].M = 0.; + IO_Fields[N_IO_Fields].V = 0.; + IO_Fields[N_IO_Fields].c = 0.; + IO_Fields[N_IO_Fields].hasunit = 0; + + N_IO_Fields++; +} + +/*! \brief Function for adding units to output field. + * + * This only works for fields registered with init_field. + * + * \param[in] field Specifies the field as an enumeration type iofields + * (allvars.h), e.g. IO_POS. + * \param[in] a the exponent of the cosmological a factor. + * \param[in] h the exponent of the hubble parameter. + * \param[in] L the length unit scaling. + * \param[in] M the mass unit scaling. + * \param[in] V the velocity unit scaling. + * \param[in] c conversion factor to cgs units (zero indicates dimensionless + * quantity, integer count, etc). + * + * \return void + */ +void init_units(enum iofields field, double a, double h, double L, double M, double V, double c) +{ + for(int i = 0; i < N_IO_Fields; i++) + { + if(IO_Fields[i].field == field) + { + IO_Fields[i].hasunit = 1; + IO_Fields[i].a = a; + IO_Fields[i].h = h; + IO_Fields[i].L = L; + IO_Fields[i].M = M; + IO_Fields[i].V = V; + IO_Fields[i].c = c; + break; + } + } +} + +/*! \brief Function for determining whether a field is dumped in snapshot. + * + * This only works for fields registered with init_field. + * The member snap_type is initialized to SN_FULL in init_field. + * + * \param[in] field Specifies the field as an enumeration type iofields + * (allvars.h), e.g. IO_POS. + * \param[in] type In which snapshot types this field should be present + * (e.g. SN_FULL). + * + * \return void + */ +void init_snapshot_type(enum iofields field, enum sn_type type) +{ + for(int i = 0; i < N_IO_Fields; i++) + { + if(IO_Fields[i].field == field) + { + IO_Fields[i].snap_type = type; + } + } +} + +#ifdef TOLERATE_WRITE_ERROR +/*! \brief Print information about a write error. + * + * If a write error occurs, this function prints some useful debug information + * and sets to 1 the variable WriteErrorFlag so that the write operation that + * caused the error can be performed again. + * + * \param[in] check Flag that indicates where the function was called [0 and 1 + * in my_fwrite(), 2 in my_hdf5_error_handler(), 3 in + * hdf5_header_error_handler()]. + * \param[in] nwritten Number of elements actually written. + * \param[in] nmemb Number of elements that should be written. + * + * \return void + */ +void write_error(int check, size_t nwritten, size_t nmemb) +{ + if(!WriteErrorFlag) + { + int len; + char hostname[MPI_MAX_PROCESSOR_NAME]; + MPI_Get_processor_name(hostname, &len); + + printf("TOLERATE_WRITE_ERROR: write failed node=%s nwritten=%lld nmemb=%lld errno=%s task=%d check=%d\n", hostname, + (long long)nwritten, (long long)nmemb, strerror(errno), ThisTask, check); + myflush(stdout); + WriteErrorFlag = 1; + } +} +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +/*! \brief Checks if a snapshot should be saved. + * + * This function checks whether a snapshot file or other kinds of output + * files, such as a projection, should be saved at the current time-step. + * If that is the case, the appropriate functions to produce the desired + * file are called and the parameter controlling the output are updated + * accordingly. + * + * \return void + */ +void create_snapshot_if_desired(void) +{ +#ifdef OUTPUT_EVERY_STEP + All.Ti_nextoutput = All.Ti_Current; +#endif /* #ifdef OUTPUT_EVERY_STEP */ + + if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) /* allow only top-level synchronization points */ + if(All.Ti_Current >= All.Ti_nextoutput && All.Ti_nextoutput >= 0) + { + DumpFlag = DumpFlagNextSnap; + produce_dump(); + + All.Ti_nextoutput = find_next_outputtime(All.Ti_Current + 1); + } +} + +/*! \brief A wrapper function used to create a snapshot. + * + * This function wraps together savepositions(), the function that + * saves the snapshot file to the disk, with functions used for + * special output needs. + * + * \return void + */ +void produce_dump(void) +{ +#ifdef UPDATE_GRADIENTS_FOR_OUTPUT + exchange_primitive_variables(); + calculate_gradients(); +#endif /* #ifdef UPDATE_GRADIENTS_FOR_OUTPUT */ + + savepositions(All.SnapshotFileCount++, 0); /* write snapshot file */ +} + +/*! \brief Saves snapshot to disk. + * + * This function writes a snapshot of the particle distribution to one or + * several files. If NumFilesPerSnapshot>1, the snapshot is distributed + * into several files, which are written simultaneously. Each file contains + * data from a group of processors of size roughly NTask/NumFilesPerSnapshot. + * + * \param[in] num The snapshot number. + * \param[in] subbox_flag If greater than 0 instructs the code to output only + * a subset of the whole domain. + * + * \return void + */ +void savepositions(int num, int subbox_flag) +{ + char buf[500]; + int n, filenr, gr, ngroups, masterTask, lastTask; + double t0, t1; + + t0 = second(); + CPU_Step[CPU_MISC] += measure_time(); + + if(DumpFlag) + { + subbox_dump = 0; + + if(subbox_flag > 0) + { + mpi_printf("\nwriting small subbox #%d snapshot file #%d @ time %g ... \n", subbox_flag - 1, num, All.Time); + subbox_dump = 1; + } + else + mpi_printf("\nwriting snapshot file #%d @ time %g ... (DumpFlag=%d)\n", num, All.Time, DumpFlag); + +#ifdef FOF + if(RestartFlag != 3 && RestartFlag != 18 && subbox_flag == 0 && DumpFlag != 2) + { + { + mpi_printf("\nWe shall first compute a group catalogue for this snapshot file\n"); + + fof_fof(num); + } + } +#endif /* #ifdef FOF */ + + if(DumpFlag != 4) + { + CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE); + + if(NTask < All.NumFilesPerSnapshot) + { + warn( + "Number of processors must be larger or equal than All.NumFilesPerSnapshot! Reducing All.NumFilesPerSnapshot " + "accordingly.\n"); + All.NumFilesPerSnapshot = NTask; + } + + if(All.SnapFormat < 1 || All.SnapFormat > 3) + terminate("Unsupported File-Format. All.SnapFormat=%d\n", All.SnapFormat); + +#ifndef HAVE_HDF5 + if(All.SnapFormat == 3) + { + mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n"); + } +#endif /* #ifndef HAVE_HDF5 */ + + /* determine global and local particle numbers */ + for(n = 0; n < NTYPES; n++) + n_type[n] = 0; + + for(n = 0; n < NumPart; n++) + { + n_type[P[n].Type]++; + } + + sumup_large_ints(NTYPES, n_type, ntot_type_all); + + /* assign processors to output files */ + distribute_file(All.NumFilesPerSnapshot, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask); + + if(All.NumFilesPerSnapshot > 1) + { + if(ThisTask == 0) + { + sprintf(buf, "%s/snapdir_%03d", All.OutputDir, num); + mkdir(buf, 02755); + +#ifdef TOLERATE_WRITE_ERROR + sprintf(alternative_fname, "%s/snapdir_%03d", AlternativeOutputDir, num); + mkdir(alternative_fname, 02755); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + } + + MPI_Barrier(MPI_COMM_WORLD); + } + + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/snapdir_%03d/%s_%03d.%d", All.OutputDir, num, All.SnapshotFileBase, num, filenr); + else + sprintf(buf, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); + +#ifdef TOLERATE_WRITE_ERROR + if(All.NumFilesPerSnapshot > 1) + sprintf(alternative_fname, "%s/snapdir_%03d/%s_%03d.%d", AlternativeOutputDir, num, All.SnapshotFileBase, num, filenr); + else + sprintf(alternative_fname, "%s%s_%03d", AlternativeOutputDir, All.SnapshotFileBase, num); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + if(RestartFlag == 3) + { +#ifndef FOF_STOREIDS + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/snapdir_%03d/%s-groupordered_%03d.%d", All.OutputDir, num, All.SnapshotFileBase, num, filenr); + else + sprintf(buf, "%s%s-groupordered_%03d", All.OutputDir, All.SnapshotFileBase, num); +#else /* #ifndef FOF_STOREIDS */ + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/snapdir_%03d/%s-storeids_%03d.%d", All.OutputDir, num, All.SnapshotFileBase, num, filenr); + else + sprintf(buf, "%s%s-storeids_%03d", All.OutputDir, All.SnapshotFileBase, num); +#endif /* #ifndef FOF_STOREIDS #else */ + } + +#ifdef ADDBACKGROUNDGRID + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s-with-grid.%d", All.InitCondFile, filenr); + else + sprintf(buf, "%s-with-grid", All.InitCondFile); +#endif /* #ifdef ADDBACKGROUNDGRID */ + + ngroups = All.NumFilesPerSnapshot / All.NumFilesWrittenInParallel; + if((All.NumFilesPerSnapshot % All.NumFilesWrittenInParallel)) + ngroups++; + + for(gr = 0; gr < ngroups; gr++) + { + if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */ + { + if(ThisTask == masterTask && (filenr % All.NumFilesWrittenInParallel) == 0) + printf("writing snapshot files group %d out of %d - files %d-%d (total of %d files): '%s'\n", gr + 1, ngroups, + filenr, filenr + All.NumFilesWrittenInParallel - 1, All.NumFilesPerSnapshot, buf); + write_file(buf, masterTask, lastTask, subbox_flag); +#ifdef OUTPUT_XDMF + if(All.SnapFormat == 3) + { + write_xdmf(buf); + } +#endif /* #ifdef OUTPUT_XDMF */ + } + MPI_Barrier(MPI_COMM_WORLD); + } + + myfree(CommBuffer); + + t1 = second(); + CPU_Step[CPU_SNAPSHOT] += measure_time(); + + mpi_printf("done with writing snapshot (took %g sec).\n", timediff(t0, t1)); + } + else + { + mpi_printf("done with writing files: no dump of snapshot (DumpFlag = %d).\n", DumpFlag); + } // if(DumpFlag !=4) + +#ifdef FOF + if(RestartFlag != 3 && RestartFlag != 6 && RestartFlag != 18 && subbox_flag == 0 && DumpFlag != 2) + { + { +#ifndef FOF_STOREIDS + /* now revert from output order to the original order */ + for(n = 0; n < NumPart; n++) + { + PS[n].TargetTask = PS[n].OriginTask; + PS[n].TargetIndex = PS[n].OriginIndex; + } + + fof_subfind_exchange(MPI_COMM_WORLD); + + myfree(PS); + + /* do resize because subfind may have increased these limits */ + if(All.MaxPart != fof_OldMaxPart) + { + All.MaxPart = fof_OldMaxPart; + reallocate_memory_maxpart(); + } + if(All.MaxPartSph != fof_OldMaxPartSph) + { + All.MaxPartSph = fof_OldMaxPartSph; + reallocate_memory_maxpartsph(); + } + + CPU_Step[CPU_FOF] += measure_time(); +#endif /* #ifndef FOF_STOREIDS */ + + /* recreate the mesh that we had free to reduce peak memory usage */ + create_mesh(); + mesh_setup_exchange(); + } + } +#endif /* #ifdef FOF */ + + All.Ti_lastoutput = All.Ti_Current; + + CPU_Step[CPU_SNAPSHOT] += measure_time(); + } +} + +/*! \brief This function fills the write buffer with particle data. + * + * \param[out] buffer Buffer to be filled. + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in, out] startindex Pointer containing the offset in write buffer. + * \param[in] pc Number of particle to be put in the buffer. + * \param[in] type Particle type. + * \param[in] subbox_flag If greater than 0 instructs the code to output + * only a subset of the whole domain. + * + * \return void + */ +void fill_write_buffer(void *buffer, enum iofields blocknr, int *startindex, int pc, int type, int subbox_flag) +{ + int n, k, pindex, f; + MyOutputFloat *fp; + MyIDType *ip; + int *intp; + + /* determine which field we are working on */ + int field = -1; + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + field = f; + break; + } + } + + if(field < 0) + terminate("IO field=%d not registered with init_field()", (int)blocknr); + + set_cosmo_factors_for_current_time(); + + fp = (MyOutputFloat *)buffer; + ip = (MyIDType *)buffer; + intp = (int *)buffer; + double *doublep = (double *)buffer; + float *floatp = (float *)buffer; + + pindex = *startindex; + + for(n = 0; n < pc; pindex++) + { + /* SUBBOX_SNAPSHOTS specialized output */ + + /* normal particle output */ + if(P[pindex].Type == type) + { + if(IO_Fields[field].io_func) + { + int particle; + switch(IO_Fields[field].array) + { + case A_NONE: + case A_SPHP: + case A_P: + particle = pindex; + break; + case A_PS: + terminate("Not good, trying to read into PS[]?\n"); + break; + default: + terminate("ERROR in fill_write_buffer: Array not found!\n"); + break; + } + + switch(IO_Fields[field].type_in_file_output) + { + case FILE_NONE: + terminate("error"); + break; + case FILE_INT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, intp, 0); + intp += IO_Fields[field].values_per_block; + n++; + break; + case FILE_MY_ID_TYPE: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, ip, 0); + ip += IO_Fields[field].values_per_block; + n++; + break; + case FILE_MY_IO_FLOAT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, fp, 0); + fp += IO_Fields[field].values_per_block; + n++; + break; + case FILE_DOUBLE: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, doublep, 0); + doublep += IO_Fields[field].values_per_block; + n++; + break; + case FILE_FLOAT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, floatp, 0); + floatp += IO_Fields[field].values_per_block; + n++; + break; + } + } + else + { + void *array_pos; + + switch(IO_Fields[field].array) + { + case A_NONE: + array_pos = 0; + break; + + case A_SPHP: + array_pos = SphP + pindex; + break; + + case A_P: + array_pos = P + pindex; + break; + case A_PS: + array_pos = PS + pindex; + break; + + default: + terminate("ERROR in fill_write_buffer: Array not found!\n"); + break; + } + + for(k = 0; k < IO_Fields[field].values_per_block; k++) + { + double value = 0.; + + switch(IO_Fields[field].type_in_memory) + { + case MEM_INT: + *intp = *((int *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(int))); + intp++; + break; + + case MEM_MY_ID_TYPE: + *ip = *((MyIDType *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyIDType))); + ip++; + break; + + case MEM_FLOAT: + value = *((float *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(float))); + break; + + case MEM_DOUBLE: + value = *((double *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(double))); + break; + + case MEM_MY_SINGLE: + value = *((MySingle *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MySingle))); + break; + + case MEM_MY_FLOAT: + value = *((MyFloat *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyFloat))); + break; + + case MEM_MY_DOUBLE: + value = *((MyDouble *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyDouble))); + break; + + case MEM_NONE: + terminate("ERROR in fill_write_buffer: reached MEM_NONE with no io_func specified!\n"); + break; + + default: + terminate("ERROR in fill_write_buffer: Type not found!\n"); + break; + } + + switch(IO_Fields[field].type_in_file_output) + { + case FILE_MY_IO_FLOAT: + *fp = value; + fp++; + break; + + case FILE_DOUBLE: + *doublep = value; + doublep++; + break; + + case FILE_FLOAT: + *floatp = value; + floatp++; + break; + + default: + break; + } + } + + n++; + } // end io_func/not + } // end type if + } // end particle loop + + *startindex = pindex; +} + +/*! \brief This function tells the size in bytes of one data entry in each of + * the blocks defined for the output file. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] mode Used to distinguish whether the function is called in input + * mode (mode > 0) or in output mode (mode = 0). The size of one + * data entry may vary depending on the mode. + * + * \return Size of the data entry in bytes. + */ +int get_bytes_per_blockelement(enum iofields blocknr, int mode) +{ + int bytes_per_blockelement = 0; + int f; + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + if(mode) + { + switch(IO_Fields[f].type_in_file_input) + { + case FILE_NONE: + terminate("error"); + break; + case FILE_INT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(int); + break; + case FILE_MY_ID_TYPE: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyIDType); + break; + case FILE_MY_IO_FLOAT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyInputFloat); + break; + case FILE_DOUBLE: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(double); + break; + case FILE_FLOAT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(float); + break; + } + } + else + { + switch(IO_Fields[f].type_in_file_output) + { + case FILE_NONE: + terminate("error"); + break; + case FILE_INT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(int); + break; + case FILE_MY_ID_TYPE: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyIDType); + break; + case FILE_MY_IO_FLOAT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyOutputFloat); + break; + case FILE_DOUBLE: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(double); + break; + case FILE_FLOAT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(float); + break; + } + } + break; + } + } + + return bytes_per_blockelement; +} + +/*! \brief This function determines the type of one data entry in each of the + * blocks defined for the output file. + * + * Used only if output in HDF5 format is enabled. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] mode For input mode > 0, for output mode = 0. + * + * \return typekey, a flag that indicates the type of the data entry. + */ +int get_datatype_in_block(enum iofields blocknr, int mode) +{ + int typekey, f; + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + if(mode) + typekey = IO_Fields[f].type_in_file_input; + else + typekey = IO_Fields[f].type_in_file_output; + + return typekey; + } + } + + terminate("error invalid field"); + return typekey; +} + +/*! \brief This function determines the number of elements composing one data + * entry in each of the blocks defined for the output file. + * + * Used only if output in HDF5 format is enabled. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * + * \return Number of elements of one data entry. + */ +int get_values_per_blockelement(enum iofields blocknr) +{ + int values = 0; + int f; + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + values = IO_Fields[f].values_per_block; + return values; + } + } + + terminate("reached last entry in switch - strange."); + return values; +} + +/*! \brief Gets particle number in an output block. + * + * This function determines how many particles there are in a given block, + * based on the information in the header-structure. It also flags particle + * types that are present in the block in the typelist array. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] typelist Array that contains the number of particles of each + * type in the block. + * + * \return The total number of particles in the block. + */ +int get_particles_in_block(enum iofields blocknr, int *typelist) +{ + int i, f; + int npart = 0; + + switch(blocknr) + { + case IO_MASS: + for(i = 0; i < NTYPES; i++) + { + typelist[i] = 0; + if(All.MassTable[i] == 0) + if(header.npart[i] > 0) + { + typelist[i] = 1; + npart += header.npart[i]; + } + } + return npart; /* with masses */ + break; + + case IO_LASTENTRY: + terminate("reached last entry in switch - strange."); + break; + + default: + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + for(i = 0; i < NTYPES; i++) + { + if((IO_Fields[f].typelist & (1 << i)) && header.npart[i] > 0) + { + typelist[i] = 1; + npart += header.npart[i]; + } + else + typelist[i] = 0; + } + + return npart; + } + } + break; + + } // end switch + + terminate("reached end of function - this should not happen"); + return 0; +} + +/*! \brief Checks if a block is expected for file input or output. + * + * This function tells whether a block in the input/output file is requested + * or not. Because the blocks processed in the two cases are different, the + * mode is indicated with the flag write (1=write, 0=read). + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] write If 0 the function is in read mode, if 1 the function is + * in write mode. + * + * \return 0 if the block is not present, 1 otherwise. + */ +int blockpresent(enum iofields blocknr, int write) +{ + int f; + + if(!write) + { +#ifdef PASSIVE_SCALARS + if(RestartFlag == 0 && blocknr == IO_PASS) + return 1; +#endif /* #ifdef PASSIVE_SCALARS */ +#if defined(MHD) && !defined(MHD_SEEDFIELD) + if(All.ICFormat != 3 && RestartFlag == 0 && (blocknr > IO_U && blocknr != IO_BFLD)) +#else /* #if defined(MHD) && !defined(MHD_SEEDFIELD) */ + if(All.ICFormat != 3 && RestartFlag == 0 && blocknr > IO_U) +#endif /* #if defined(MHD) && !defined(MHD_SEEDFIELD) #else */ +#ifdef READ_LEGACY_ICS + if(RestartFlag == 0 && blocknr > IO_U && blocknr != IO_BFLD) +#else /* #ifdef READ_LEGACY_ICS */ + if(RestartFlag == 0) +#endif /* #ifdef READ_LEGACY_ICS #else */ + return 0; /* ignore all other blocks in non-HDF5 initial conditions */ + } + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + if(!write) + { + if(IO_Fields[f].type_in_file_input != FILE_NONE) + { + return 1; + } + } + else + { + if(IO_Fields[f].type_in_file_output == FILE_NONE) + return 0; + + /* subboxes: write all fields except those marked by SN_NO_SUBBOX or SN_MINI_ONLY + (must come first to ignore DumpFlag) */ + if(subbox_dump) + { + if(IO_Fields[f].snap_type == SN_NO_SUBBOX || IO_Fields[f].snap_type == SN_MINI_ONLY) + return 0; + + return 1; + } + + /* normal full snapshot (with or without groupcat): only skip fields marked by SN_MINI_ONLY */ + if(DumpFlag == 1 || DumpFlag == 2) + { + if(IO_Fields[f].snap_type == SN_MINI_ONLY) + return 0; + + return 1; + } + + /* mini-snaps: write only those fields marked by either SN_MINI or SN_MINI_ONLY */ + if(DumpFlag == 3) + { + if(IO_Fields[f].snap_type == SN_MINI || IO_Fields[f].snap_type == SN_MINI_ONLY) + return 1; + + if(IO_Fields[f].typelist == BHS_ONLY) + return 1; // temporarily hard-coded that all BH fields are included in mini-snaps + + return 0; // specifically do not include any other fields in mini-snaps + } + } + return 0; + } + } + + return 0; /* default: not present */ +} + +/*! \brief This function associates a short 4-character block name with each + * block number. + * + * This is stored in front of each block for snapshot FileFormat=2. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] label string containing the dataset name. + * + * \return void + */ +void get_Tab_IO_Label(enum iofields blocknr, char *label) +{ + int f; + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + strncpy(label, IO_Fields[f].label, 4); + return; + } + } + + terminate("error invalid field"); +} + +/*! \brief This function associates a dataset name with each block number. + * + * This is needed to name the dataset if the output is written in HDF5 + * format. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] buf String containing the dataset name. + * + * \return void + */ +void get_dataset_name(enum iofields blocknr, char *buf) +{ + int f; + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + strcpy(buf, IO_Fields[f].datasetname); + return; + } + } + + terminate("error invalid field"); +} + +/*! \brief Actually write the snapshot file to the disk. + * + * This function writes a snapshot file containing the data from processors + * 'writeTask' to 'lastTask'. 'writeTask' is the one that actually writes. + * Each snapshot file contains a header and cell/particle details. The + * output fields for each particle type depend on included physics + * and compile-time flags. + * + * \param[in] fname String containing the file name. + * \param[in] writeTask The rank of the task in a writing group that which + * is responsible for the output operations. + * \param[in] lastTask The rank of the last task in a writing group. + * \param[in] subbox_flag If greater than 0 instructs the code to output + * only a subset of the whole domain. + * + * \return void + */ +void write_file(char *fname, int writeTask, int lastTask, int subbox_flag) +{ + int type, bytes_per_blockelement, npart, nextblock, typelist[NTYPES]; + int n_for_this_task, n, p, pc, offset = 0, task; + int blockmaxlen, ntot_type[NTYPES], nn[NTYPES]; + enum iofields blocknr; + char label[8]; + int bnr; + int blksize; + MPI_Status status; + FILE *fd = 0; + int pcsum = 0; + +#ifdef HAVE_HDF5 + hid_t hdf5_file = 0, hdf5_grp[NTYPES], hdf5_headergrp = 0, hdf5_dataspace_memory; + hid_t hdf5_datatype = 0, hdf5_dataspace_in_file = 0, hdf5_dataset = 0; + hsize_t dims[2], count[2], start[2]; + int rank = 0; + char buf[500]; +#ifdef HDF5_FILTERS + hid_t hdf5_properties; +#endif /* #ifdef HDF5_FILTERS */ + hid_t hdf5_paramsgrp = 0; + hid_t hdf5_configgrp = 0; +#endif /* #ifdef HAVE_HDF5 */ + +#define SKIP \ + { \ + my_fwrite(&blksize, sizeof(int), 1, fd); \ + } + +#ifdef TOLERATE_WRITE_ERROR + for(int try_io = 0; try_io < 2; try_io++) + { + WriteErrorFlag = 0; +#ifdef HAVE_HDF5 + H5Eget_current_stack(); /* clears current error stack */ +#endif /* #ifdef HAVE_HDF5 */ +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + /* determine particle numbers of each type in file */ + if(ThisTask == writeTask) + { + for(n = 0; n < NTYPES; n++) + ntot_type[n] = n_type[n]; + + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&nn[0], NTYPES, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + for(n = 0; n < NTYPES; n++) + ntot_type[n] += nn[n]; + } + + for(task = writeTask + 1; task <= lastTask; task++) + MPI_Send(&ntot_type[0], NTYPES, MPI_INT, task, TAG_N, MPI_COMM_WORLD); + } + else + { + MPI_Send(&n_type[0], NTYPES, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Recv(&ntot_type[0], NTYPES, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status); + } + + /* fill file header */ + for(n = 0; n < NTYPES; n++) + { + header.npart[n] = ntot_type[n]; + header.npartTotal[n] = (unsigned int)ntot_type_all[n]; + header.npartTotalHighWord[n] = (unsigned int)(ntot_type_all[n] >> 32); + } + + for(n = 0; n < NTYPES; n++) + header.mass[n] = All.MassTable[n]; + + header.time = All.Time; + + if(All.ComovingIntegrationOn) + header.redshift = 1.0 / All.Time - 1; + else + header.redshift = 0; + + header.flag_sfr = 0; + header.flag_feedback = 0; + header.flag_cooling = 0; + header.flag_stellarage = 0; + header.flag_metals = 0; + + header.flag_tracer_field = 0; + +#ifdef COOLING + header.flag_cooling = 1; +#endif /* #ifdef COOLING */ + +#ifdef USE_SFR + header.flag_sfr = 1; + header.flag_feedback = 1; +#endif /* #ifdef USE_SFR */ + + header.num_files = All.NumFilesPerSnapshot; + header.BoxSize = All.BoxSize; + header.Omega0 = All.Omega0; + header.OmegaLambda = All.OmegaLambda; + header.HubbleParam = All.HubbleParam; + +#ifdef OUTPUT_IN_DOUBLEPRECISION + header.flag_doubleprecision = 1; +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + header.flag_doubleprecision = 0; +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + + /* open file and write header */ + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + sprintf(buf, "%s.hdf5", fname); + hdf5_file = my_H5Fcreate(buf, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + hdf5_headergrp = my_H5Gcreate(hdf5_file, "/Header", 0); + + for(type = 0; type < NTYPES; type++) + { + if(header.npart[type] > 0) + { + sprintf(buf, "/PartType%d", type); + hdf5_grp[type] = my_H5Gcreate(hdf5_file, buf, 0); + } + } + + write_header_attributes_in_hdf5(hdf5_headergrp); + + hdf5_paramsgrp = my_H5Gcreate(hdf5_file, "/Parameters", 0); + write_parameters_attributes_in_hdf5(hdf5_paramsgrp); + + hdf5_configgrp = my_H5Gcreate(hdf5_file, "/Config", 0); + write_compile_time_options_in_hdf5(hdf5_configgrp); +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + if(!(fd = fopen(fname, "w"))) + { + printf("can't open file `%s' for writing snapshot.\n", fname); + terminate("file open error"); + } + + if(All.SnapFormat == 2) + { + blksize = sizeof(int) + 4 * sizeof(char); + SKIP; + my_fwrite((void *)"HEAD", sizeof(char), 4, fd); + nextblock = sizeof(header) + 2 * sizeof(int); + my_fwrite(&nextblock, sizeof(int), 1, fd); + SKIP; + } + + blksize = sizeof(header); + SKIP; + my_fwrite(&header, sizeof(header), 1, fd); + SKIP; + } + } + + for(bnr = 0; bnr < 1000; bnr++) + { + blocknr = (enum iofields)bnr; + + if(blocknr == IO_LASTENTRY) + break; + + if(blockpresent(blocknr, 1)) + { + bytes_per_blockelement = get_bytes_per_blockelement(blocknr, 0); + + blockmaxlen = (int)(COMMBUFFERSIZE / bytes_per_blockelement); + + npart = get_particles_in_block(blocknr, &typelist[0]); + + if(npart > 0) + { + if(ThisTask == 0) + { + char buf[1000]; + + get_dataset_name(blocknr, buf); + if(subbox_flag == 0) + printf("writing block %d (%s)...\n", blocknr, buf); + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 1 || All.SnapFormat == 2) + { + if(All.SnapFormat == 2) + { + blksize = sizeof(int) + 4 * sizeof(char); + SKIP; + get_Tab_IO_Label(blocknr, label); + my_fwrite(label, sizeof(char), 4, fd); + nextblock = npart * bytes_per_blockelement + 2 * sizeof(int); + my_fwrite(&nextblock, sizeof(int), 1, fd); + SKIP; + } + + blksize = npart * bytes_per_blockelement; + SKIP; + } + } + + for(type = 0; type < NTYPES; type++) + { + if(typelist[type]) + { +#ifdef HAVE_HDF5 + if(ThisTask == writeTask && All.SnapFormat == 3 && header.npart[type] > 0) + { + switch(get_datatype_in_block(blocknr, 0)) + { + case FILE_INT: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT); + break; + case FILE_MY_IO_FLOAT: +#ifdef OUTPUT_IN_DOUBLEPRECISION + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + break; + case FILE_MY_ID_TYPE: +#ifdef LONGIDS + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT64); +#else /* #ifdef LONGIDS */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT32); +#endif /* #ifdef LONGIDS #else */ + break; + case FILE_DOUBLE: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); + break; + case FILE_FLOAT: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); + break; + } + + dims[0] = header.npart[type]; + dims[1] = get_values_per_blockelement(blocknr); + if(dims[1] == 1) + rank = 1; + else + rank = 2; + + get_dataset_name(blocknr, buf); + + hdf5_dataspace_in_file = my_H5Screate_simple(rank, dims, NULL); +#ifdef HDF5_FILTERS + hdf5_properties = my_H5Pcreate(H5P_DATASET_CREATE); + my_H5Pset_chunk(hdf5_properties, rank, dims); /* set chunk size */ + my_H5Pset_shuffle(hdf5_properties); /* reshuffle bytes to get better compression ratio */ + my_H5Pset_deflate(hdf5_properties, 9); /* gzip compression level 9 */ + my_H5Pset_fletcher32(hdf5_properties); /* Fletcher32 checksum on dataset */ + + if(my_H5Pall_filters_avail(hdf5_properties)) + hdf5_dataset = + my_H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file, hdf5_properties); + else + { + printf("HDF5_FILTERS: Warning selected filters not available! Writing data without filters! \n"); + myflush(stdout); + hdf5_dataset = my_H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file, H5P_DEFAULT); + } +#else /* #ifdef HDF5_FILTERS */ + hdf5_dataset = my_H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file, H5P_DEFAULT); +#endif /* #ifdef HDF5_FILTERS #else */ + write_dataset_attributes(hdf5_dataset, blocknr); + } +#endif /* #ifdef HAVE_HDF5 */ + + pcsum = 0; + int remaining_space = blockmaxlen; + int bufferstart = 0; + + for(task = writeTask, offset = 0; task <= lastTask; task++) + { + if(task == ThisTask) + { + n_for_this_task = n_type[type]; + + for(p = writeTask; p <= lastTask; p++) + if(p != ThisTask) + MPI_Send(&n_for_this_task, 1, MPI_INT, p, TAG_NFORTHISTASK, MPI_COMM_WORLD); + } + else + MPI_Recv(&n_for_this_task, 1, MPI_INT, task, TAG_NFORTHISTASK, MPI_COMM_WORLD, &status); + + while(n_for_this_task > 0) + { + pc = n_for_this_task; + + if(pc > blockmaxlen) + pc = blockmaxlen; + + if(pc > remaining_space) + pc = remaining_space; + + void *buffer = (void *)((char *)CommBuffer + bufferstart * bytes_per_blockelement); + + if(ThisTask == task) + fill_write_buffer(buffer, blocknr, &offset, pc, type, subbox_flag); + + if(ThisTask == writeTask && task != writeTask) + MPI_Recv(buffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA, MPI_COMM_WORLD, &status); + + if(ThisTask != writeTask && task == ThisTask) + MPI_Ssend(buffer, bytes_per_blockelement * pc, MPI_BYTE, writeTask, TAG_PDATA, MPI_COMM_WORLD); + + remaining_space -= pc; + bufferstart += pc; + + if(remaining_space == 0) + { + /* write stuff (number of elements equal to bufferstart) */ + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + start[0] = pcsum; + start[1] = 0; + + count[0] = bufferstart; + count[1] = get_values_per_blockelement(blocknr); + + my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL); + + dims[0] = bufferstart; + dims[1] = get_values_per_blockelement(blocknr); + hdf5_dataspace_memory = my_H5Screate_simple(rank, dims, NULL); + + my_H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory, hdf5_dataspace_in_file, + H5P_DEFAULT, CommBuffer, buf); + + my_H5Sclose(hdf5_dataspace_memory, H5S_SIMPLE); +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + my_fwrite(CommBuffer, bytes_per_blockelement, bufferstart, fd); + } + } + + pcsum += bufferstart; + remaining_space = blockmaxlen; + bufferstart = 0; + } + + n_for_this_task -= pc; + } + } + + if(bufferstart > 0) + { + /* write remaining stuff (number of elements equal to bufferstart) */ + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + start[0] = pcsum; + start[1] = 0; + + count[0] = bufferstart; + count[1] = get_values_per_blockelement(blocknr); + + my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL); + + dims[0] = bufferstart; + dims[1] = get_values_per_blockelement(blocknr); + hdf5_dataspace_memory = my_H5Screate_simple(rank, dims, NULL); + + my_H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory, hdf5_dataspace_in_file, + H5P_DEFAULT, CommBuffer, buf); + + my_H5Sclose(hdf5_dataspace_memory, H5S_SIMPLE); +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + my_fwrite(CommBuffer, bytes_per_blockelement, bufferstart, fd); + } + } + + pcsum += bufferstart; + remaining_space = blockmaxlen; + bufferstart = 0; + } + +#ifdef HAVE_HDF5 + if(ThisTask == writeTask && All.SnapFormat == 3 && header.npart[type] > 0) + { + if(All.SnapFormat == 3) + { + my_H5Dclose(hdf5_dataset, buf); +#ifdef HDF5_FILTERS + my_H5Pclose(hdf5_properties); +#endif /* #ifdef HDF5_FILTERS */ + my_H5Sclose(hdf5_dataspace_in_file, H5S_SIMPLE); + my_H5Tclose(hdf5_datatype); + } + } +#endif /* #ifdef HAVE_HDF5 */ + } + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 1 || All.SnapFormat == 2) + SKIP; + } + } + +#ifdef TOLERATE_WRITE_ERROR + if(ThisTask == writeTask) + { + for(int p = writeTask; p <= lastTask; p++) + if(p != ThisTask) + MPI_Send(&WriteErrorFlag, 1, MPI_INT, p, TAG_KEY, MPI_COMM_WORLD); + } + else + MPI_Recv(&WriteErrorFlag, 1, MPI_INT, writeTask, TAG_KEY, MPI_COMM_WORLD, &status); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + } + +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag) /* don't write further blocks in this case */ + break; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + for(type = NTYPES - 1; type >= 0; type--) + if(header.npart[type] > 0) + my_H5Gclose(hdf5_grp[type], buf); + my_H5Gclose(hdf5_headergrp, "/Header"); + my_H5Gclose(hdf5_paramsgrp, "/Parameters"); + my_H5Gclose(hdf5_configgrp, "/Config"); + + sprintf(buf, "%s.hdf5", fname); + my_H5Fclose(hdf5_file, buf); +#endif /* #ifdef HAVE_HDF5 */ + } + else + fclose(fd); + } + +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag == 0) + break; + + if(try_io == 0) + { + if(ThisTask == writeTask) + { + printf( + "TOLERATE_WRITE_ERROR: Try to write to alternative file: masterTask=%d lastTask=%d try_io=%d " + "alternative-filename='%s'\n", + writeTask, lastTask, try_io, alternative_fname); + myflush(stdout); + } + fname = alternative_fname; /* try on a different output directory */ + } + else + { + terminate("TOLERATE_WRITE_ERROR: Second try with alternative file failed too.\n"); + } + } +#endif /* #ifdef TOLERATE_WRITE_ERROR */ +} + +#ifdef HAVE_HDF5 +/*! \brief Write the fields contained in the header group of the HDF5 snapshot + * file. + * + * This function stores the fields of the structure io_header as attributes + * belonging to the header group of the HDF5 file. + * + * \param[in] handle A handle for the header group. + * + * \return void + */ +void write_header_attributes_in_hdf5(hid_t handle) +{ + hsize_t adim[1] = {NTYPES}; + hid_t hdf5_dataspace, hdf5_attribute; + + hdf5_dataspace = my_H5Screate(H5S_SIMPLE); + my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "NumPart_ThisFile"); + hdf5_attribute = my_H5Acreate(handle, "NumPart_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, header.npart, "NumPart_ThisFile"); + my_H5Aclose(hdf5_attribute, "NumPart_ThisFile"); + my_H5Sclose(hdf5_dataspace, H5S_SIMPLE); + + hdf5_dataspace = my_H5Screate(H5S_SIMPLE); + my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "NumPart_Total"); + hdf5_attribute = my_H5Acreate(handle, "NumPart_Total", H5T_NATIVE_UINT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotal, "NumPart_Total"); + my_H5Aclose(hdf5_attribute, "NumPart_Total"); + my_H5Sclose(hdf5_dataspace, H5S_SIMPLE); + + hdf5_dataspace = my_H5Screate(H5S_SIMPLE); + my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "NumPart_Total_HighWord"); + hdf5_attribute = my_H5Acreate(handle, "NumPart_Total_HighWord", H5T_NATIVE_UINT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotalHighWord, "NumPart_Total_HighWord"); + my_H5Aclose(hdf5_attribute, "NumPart_Total_HighWord"); + my_H5Sclose(hdf5_dataspace, H5S_SIMPLE); + + hdf5_dataspace = my_H5Screate(H5S_SIMPLE); + my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "MassTable"); + hdf5_attribute = my_H5Acreate(handle, "MassTable", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, header.mass, "MassTable"); + my_H5Aclose(hdf5_attribute, "MassTable"); + my_H5Sclose(hdf5_dataspace, H5S_SIMPLE); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Time", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.time, "Time"); + my_H5Aclose(hdf5_attribute, "Time"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Redshift", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.redshift, "Redshift"); + my_H5Aclose(hdf5_attribute, "Redshift"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "BoxSize", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.BoxSize, "BoxSize"); + my_H5Aclose(hdf5_attribute, "BoxSize"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "NumFilesPerSnapshot", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.num_files, "NumFilesPerSnapshot"); + my_H5Aclose(hdf5_attribute, "NumFilesPerSnapshot"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Omega0", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.Omega0, "Omega0"); + my_H5Aclose(hdf5_attribute, "Omega0"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "OmegaLambda", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.OmegaLambda, "OmegaLambda"); + my_H5Aclose(hdf5_attribute, "OmegaLambda"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "OmegaBaryon", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.OmegaBaryon, "OmegaBaryon"); + my_H5Aclose(hdf5_attribute, "OmegaBaryon"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "HubbleParam", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.HubbleParam, "HubbleParam"); + my_H5Aclose(hdf5_attribute, "HubbleParam"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_Sfr", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_sfr, "Flag_Sfr"); + my_H5Aclose(hdf5_attribute, "Flag_Sfr"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_Cooling", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_cooling, "Flag_Cooling"); + my_H5Aclose(hdf5_attribute, "Flag_Cooling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_StellarAge", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_stellarage, "Flag_StellarAge"); + my_H5Aclose(hdf5_attribute, "Flag_StellarAge"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_Metals", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_metals, "Flag_Metals"); + my_H5Aclose(hdf5_attribute, "Flag_Metals"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_Feedback", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_feedback, "Flag_Feedback"); + my_H5Aclose(hdf5_attribute, "Flag_Feedback"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_DoublePrecision", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_doubleprecision, "Flag_DoublePrecision"); + my_H5Aclose(hdf5_attribute, "Flag_DoublePrecision"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Composition_vector_length", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.composition_vector_length, "Composition_vector_length"); + my_H5Aclose(hdf5_attribute, "Composition_vector_length"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hid_t atype = my_H5Tcopy(H5T_C_S1); + + my_H5Tset_size(atype, strlen(GIT_COMMIT)); + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Git_commit", atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, GIT_COMMIT, "Git_commit"); + my_H5Aclose(hdf5_attribute, "Git_commit"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + my_H5Tset_size(atype, strlen(GIT_DATE)); + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Git_date", atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, GIT_DATE, "Git_date"); + my_H5Aclose(hdf5_attribute, "Git_date"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "UnitLength_in_cm", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.UnitLength_in_cm, "UnitLength_in_cm"); + my_H5Aclose(hdf5_attribute, "UnitLength_in_cm"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "UnitMass_in_g", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.UnitMass_in_g, "UnitMass_in_g"); + my_H5Aclose(hdf5_attribute, "UnitMass_in_g"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "UnitVelocity_in_cm_per_s", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.UnitVelocity_in_cm_per_s, "UnitVelocity_in_cm_per_s"); + my_H5Aclose(hdf5_attribute, "UnitVelocity_in_cm_per_s"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); +} + +/*! \brief Write the parameters read from the parameter file in the HDF5 + * snapshot file. + * + * This function stores the parameter io_header as attributes belonging + * to the parameter group of the HDF5 file. + * + * \param[in] handle A handle for the parameter group. + * + * \return void + */ +void write_parameters_attributes_in_hdf5(hid_t handle) +{ + hid_t hdf5_dataspace, hdf5_attribute, atype = my_H5Tcopy(H5T_C_S1); + int i = 0; + + my_H5Tset_size(atype, MAXLEN_PARAM_VALUE); + + for(i = 0; i < All.NParameters; i++) + { + switch(ParametersType[i]) + { + case 1: // REAL + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, Parameters[i], H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, ParametersValue[i], Parameters[i]); + my_H5Aclose(hdf5_attribute, Parameters[i]); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + break; + case 2: // STRING + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, Parameters[i], atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, ParametersValue[i], Parameters[i]); + my_H5Aclose(hdf5_attribute, Parameters[i]); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + break; + case 3: // INT + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, Parameters[i], H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, ParametersValue[i], Parameters[i]); + my_H5Aclose(hdf5_attribute, Parameters[i]); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + break; + } + } + + my_H5Tclose(atype); +} + +/*! \brief A simple error handler for HDF5. + * + * This function terminates the run or if write errors are tolerated, calls + * the write_error() function to print information about the error and returns + * a positive integer to allow the repetition of the write operation + * (see also the HDF5 documentation). + * + * \param[in] unused The parameter is not used, but it is necessary for + * compatibility with the HDF5 library. + * + * \return 1 if the write error is tolerated, otherwise the run is terminated. + */ +herr_t my_hdf5_error_handler(void *unused) +{ +#ifdef TOLERATE_WRITE_ERROR + if(FlagNyt == 0) + write_error(2, 0, 0); + return 1; +#else + return 0; +#endif +} + +/*! \brief Write attributes to dataset, scaling with a and h (cosmological) + * and units. + * + * Only for hdf5 output. + * + * \param[in] hdf5_dataset Dataset identifier. + * \param[in] blocknumber Number of field which is written. + * + * \return void + */ +void write_dataset_attributes(hid_t hdf5_dataset, enum iofields blocknr) +{ + int ind = -1; + + for(int f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + ind = f; + break; + } + } + + if(ind < 0) + { + return; + } + + if(IO_Fields[ind].hasunit == 0) + return; + + if(All.ComovingIntegrationOn) + { + hid_t hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hid_t hdf5_attribute = my_H5Acreate(hdf5_dataset, "a_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].a, "a_scaling"); + my_H5Aclose(hdf5_attribute, "a_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "h_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].h, "h_scaling"); + my_H5Aclose(hdf5_attribute, "h_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + } + else + { + double zero = 0; + hid_t hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hid_t hdf5_attribute = my_H5Acreate(hdf5_dataset, "a_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &zero, "a_scaling"); + my_H5Aclose(hdf5_attribute, "a_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "h_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &zero, "h_scaling"); + my_H5Aclose(hdf5_attribute, "h_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + } + + hid_t hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hid_t hdf5_attribute = my_H5Acreate(hdf5_dataset, "length_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].L, "length_scaling"); + my_H5Aclose(hdf5_attribute, "length_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "mass_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].M, "mass_scaling"); + my_H5Aclose(hdf5_attribute, "mass_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "velocity_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].V, "velocity_scaling"); + my_H5Aclose(hdf5_attribute, "velocity_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "to_cgs", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].c, "to_cgs"); + my_H5Aclose(hdf5_attribute, "to_cgs"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); +} +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef OUTPUT_XDMF +/*! \brief Outputs a xdmf file corresponding to this snapshot. + * + * This xdmf file can be used to load the snapshot into programs like visit. + * This option only works with output format 3 (hdf5). + * + * \param[in] fname Name of the snapshot. + * + * \return void + */ +static void write_xdmf(char *fname) +{ + FILE *f; + char buf[256], buf2[256]; + int i; + int npresent[NTYPES]; + + for(i = 0; i < NTYPES; i++) + npresent[i] = 0; + +#ifdef OUTPUT_IN_DOUBLEPRECISION + int prec = 8; +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + int prec = 4; +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + + sprintf(buf, "%s.xmf", fname); + f = fopen(buf, "w"); + + fprintf(f, "\n"); + fprintf(f, "\n"); + fprintf(f, "\n"); + fprintf(f, " "); + + /* hdf5 file path relative to xmf file, uses basename function of libgen.h, + * i.e. POSIX version of basename() */ + sprintf(buf, "./%s.hdf5", basename(fname)); + int type = 0; + for(; type < NTYPES; type++) + { + int bnr; + + for(bnr = 0; bnr < 1000; bnr++) + { + enum iofields i = (enum iofields)bnr; + + if(i == IO_LASTENTRY) + break; + + if(blockpresent(i, 1)) + { + // get_particles_in_block(i, ntypes); + + if(header.npart[type] > 0) + { + if(i == IO_POS) + { + fprintf(f, " \n", type); + fprintf(f, " \n", header.npart[type]); + fprintf(f, " \n"); + fprintf(f, " \n", + header.npart[type], prec); + fprintf(f, " %s:/PartType0/Coordinates\n", buf); + fprintf(f, " \n"); + fprintf(f, " \n"); + + npresent[type] = 1; + } + else + { + int dim = get_values_per_blockelement(i); + int dtype = get_datatype_in_block(i, 0); + get_dataset_name(i, buf2); + + if(dim == 1 || dim == 3) + { + if(dtype == 1) + { + if(dim == 1) + { + fprintf(f, " \n", buf2); + fprintf(f, " \n", + header.npart[type], prec); + } + else + { + fprintf(f, " \n", buf2); + fprintf(f, + " \n", + header.npart[type], prec); + } + + fprintf(f, " %s:/PartType%d/%s\n", buf, type, buf2); + fprintf(f, " \n"); + fprintf(f, " \n"); + } + } + } + } + } + } + if(npresent[type] == 1) + { + fprintf(f, " \n"); + } + } + + fprintf(f, " \n"); + fprintf(f, ""); + + fclose(f); +} +#endif /* #ifdef OUTPUT_XDMF */ + +/*! \brief A wrapper for the fwrite() function. + * + * This catches I/O errors occuring for fwrite(). In this case we + * better stop. If stream is null, no attempt at writing is done. + * + * \param[in] ptr Pointer to the beginning of data to write. + * \param[in] size Size in bytes of a single data element. + * \param[in] nmemb Number of elements to be written. + * \param[in] stream Pointer to the output stream. + * + * \return Number of elements written to stream. + */ +size_t my_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + size_t nwritten; + +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag) + return 0; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + if(!stream) + return 0; + + if(size * nmemb > 0) + { + if((nwritten = fwrite(ptr, size, nmemb, stream)) != nmemb) + { +#ifdef TOLERATE_WRITE_ERROR + write_error(0, nwritten, nmemb); +#else /* #ifdef TOLERATE_WRITE_ERROR */ + printf("I/O error (fwrite) on task=%d has occured: %s\n", ThisTask, strerror(errno)); + myflush(stdout); + terminate("write error"); +#endif /* #ifdef TOLERATE_WRITE_ERROR #else */ + } + } + else + nwritten = 0; + +#ifdef TOLERATE_WRITE_ERROR + if(ferror(stream)) + write_error(1, nwritten, nmemb); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + return nwritten; +} + +/*! \brief A wrapper for the fread() function. + * + * This catches I/O errors occuring for fread(). In this case we + * better stop. If stream is null, no attempt at readingis done. + * + * \param[out] ptr Pointer to the beginning of memory location where to + * store data. + * \param[in] size Size in bytes of a single data element. + * \param[in] nmemb Number of elements to be read. + * \param[in] stream Pointer to the input stream. + * + * \return Number of elements read from stream. + */ +size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + size_t nread; + + if(!stream) + return 0; + + if(size * nmemb > 0) + { + if((nread = fread(ptr, size, nmemb, stream)) != nmemb) + { + if(feof(stream)) + printf("I/O error (fread) on task=%d has occured: end of file\n", ThisTask); + else + printf("I/O error (fread) on task=%d has occured: %s\n", ThisTask, strerror(errno)); + myflush(stdout); + terminate("read error"); + } + } + else + nread = 0; + + return nread; +} + +/*! \brief A wrapper for the printf() function. + * + * This function has the same functionalities of the standard printf() + * function. However, data is written to the standard output only for + * the task with rank 0. + * + * \param[in] fmt String that contains format arguments. + * + * \return void + */ +void mpi_printf(const char *fmt, ...) +{ + if(ThisTask == 0) + { + va_list l; + va_start(l, fmt); + vprintf(fmt, l); + myflush(stdout); + va_end(l); + } +} + +/*! \brief A wrapper for the fprintf() function. + * + * This function has the same functionalities of the standard fprintf() + * function. However, data is written to the standard output only for + * the task with rank 0. + * + * \param[in] fmt String that contains format arguments. + * + * \return void + */ +void mpi_fprintf(FILE *stream, const char *fmt, ...) +{ + if(ThisTask == 0) + { + va_list l; + va_start(l, fmt); + vfprintf(stream, fmt, l); + myflush(stream); + va_end(l); + } +} + +/*! \brief A function for printing debug information in parallel. + * + * This function works like printf, however it takes care + * that the output is contigous in the stdout from task 0 to task NTask-1. + * Run this debug function only in code parts which all tasks reach. + * + * + * \param[in] fmt String that contains format arguments. + * + * \return void + */ +void mpi_printf_each(const char *fmt, ...) +{ + char buffer[2048]; + + va_list l; + va_start(l, fmt); + vsprintf(buffer, fmt, l); + va_end(l); + + if(ThisTask == 0) + { + // print own message + printf("%s", buffer); + + // print message from other tasks + unsigned int i; + + for(i = 1; i < NTask; i++) + { + MPI_Recv(buffer, 2048, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + printf("%s", buffer); + } + } + + else + { + MPI_Send(buffer, strlen(buffer) + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD); + } +} + +/*! \brief Opens the requested file name and returns the file descriptor. + * + * If opening fails, an error is printed and the file descriptor is + * null. + * + * \param[in] fnam The file name. + * + * \return A file descriptor to the file. + */ +FILE *open_file(char *fnam) +{ + FILE *fd; + + if(!(fd = fopen(fnam, "w"))) + { + printf("can't open file `%s' for writing.\n", fnam); + } + return fd; +} diff --git a/src/amuse/community/arepo/src/io/io_fields.c b/src/amuse/community/arepo/src/io/io_fields.c new file mode 100644 index 0000000000..d36231c570 --- /dev/null +++ b/src/amuse/community/arepo/src/io/io_fields.c @@ -0,0 +1,765 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/io_fields.c + * \date 05/2018 + * \brief User defined functions for output; needed for all + * quantities that are not stored in a global array + * \details contains functions: + * static void io_func_task(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_timebin_hydro(int particle, int + * components, void *out_buffer, int mode) + * static void io_func_timestep(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_softenings(int particle, int components, + * void *out_buffer, int mode) + * void io_func_pos(int particle, int components, void *buffer, + * int mode) + * static void io_func_vel(int particle, int components, void + * *buffer, int mode) + * static void io_func_coolrate(int particle, int components, + * void *buffer, int mode) + * static void io_func_ne(int particle, int components, void + * *buffer, int mode) + * static void io_func_nh(int particle, int components, void + * *buffer, int mode) + * static void io_func_curlvel(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_vorticity(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_cell_spin(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_bfield(int particle, int components, + * void *out_buffer, int mode) + * void init_io_fields() + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef OUTPUT_TASK +/*! \brief Output of the task the particles are at. + * + * \param[in] particle (unused) + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_task(int particle, int components, void *out_buffer, int mode) { ((int *)out_buffer)[0] = ThisTask; } +#endif /* #ifdef OUTPUT_TASK */ + +#ifdef OUTPUT_TIMEBIN_HYDRO +/*! \brief Output function of the timebin corresponding to the hydrodynamic + * timestep. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_timebin_hydro(int particle, int components, void *out_buffer, int mode) +{ + ((int *)out_buffer)[0] = P[particle].TimeBinHydro; +} +#endif /* #ifdef OUTPUT_TIMEBIN_HYDRO */ + +#ifdef OUTPUTTIMESTEP +/*! \brief Output function of the hydrodynamic timestep. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_timestep(int particle, int components, void *out_buffer, int mode) +{ + ((MyOutputFloat *)out_buffer)[0] = + (P[particle].TimeBinHydro ? (((integertime)1) << P[particle].TimeBinHydro) : 0) * All.Timebase_interval; +} +#endif /* #ifdef OUTPUTTIMESTEP */ + +#ifdef OUTPUT_SOFTENINGS +/*! \brief Output function of the force softening. + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode (output) + * + * \return void + */ +static void io_func_softenings(int particle, int components, void *out_buffer, int mode) +{ + ((MyOutputFloat *)out_buffer)[0] = All.ForceSoftening[P[particle].SofteningType]; +} +#endif /* #ifdef OUTPUT_SOFTENINGS */ + +/*! \brief IO function of the particle positions. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode (0: output, 1: input). + * + * \return void + */ +void io_func_pos(int particle, int components, void *buffer, int mode) +{ + int k; + + if(mode == 0) + { + if(DumpFlag != 3) // TODO: clean up this code duplication + { +#ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION + double *pp = buffer; +#else /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION */ + MyOutputFloat *pp = buffer; +#endif /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION #else */ + + for(k = 0; k < 3; k++) + { + pp[k] = P[particle].Pos[k] - All.GlobalDisplacementVector[k]; + +#if defined(GRAVITY_NOT_PERIODIC) + if(P[particle].Type != 0) + continue; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + double boxSize = All.BoxSize; +#ifdef LONG_X + if(k == 0) + boxSize = All.BoxSize * LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + if(k == 1) + boxSize = All.BoxSize * LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + if(k == 2) + boxSize = All.BoxSize * LONG_Z; +#endif /* #ifdef LONG_Z */ + while(pp[k] < 0) + pp[k] += boxSize; + while(pp[k] >= boxSize) + pp[k] -= boxSize; + } + } + else + { + MyOutputFloat *pp = buffer; + + for(k = 0; k < 3; k++) + { + pp[k] = P[particle].Pos[k] - All.GlobalDisplacementVector[k]; + +#if defined(GRAVITY_NOT_PERIODIC) + if(P[particle].Type != 0) + continue; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + double boxSize = All.BoxSize; +#ifdef LONG_X + if(k == 0) + boxSize = All.BoxSize * LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + if(k == 1) + boxSize = All.BoxSize * LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + if(k == 2) + boxSize = All.BoxSize * LONG_Z; +#endif /* #ifdef LONG_Z */ + while(pp[k] < 0) + pp[k] += boxSize; + while(pp[k] >= boxSize) + pp[k] -= boxSize; + } + } + } + else + { +#ifdef READ_COORDINATES_IN_DOUBLE + double *in_buffer = buffer; +#else /* #ifdef READ_COORDINATES_IN_DOUBLE */ + MyInputFloat *in_buffer = buffer; +#endif /* #ifdef READ_COORDINATES_IN_DOUBLE #else */ + + for(k = 0; k < components; k++) + { + P[particle].Pos[k] = in_buffer[k] + All.GlobalDisplacementVector[k]; + } + } +} + +/*! \brief IO function for velocities. + * + * Note the different factors of scalefactor in the output than in the code! + * + * \param[in] particle Index of particle/cell. + * \param[in] components Number of entries in array. + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_vel(int particle, int components, void *buffer, int mode) +{ + int k; + + if(mode == 0) + { + for(k = 0; k < components; k++) + { + ((MyOutputFloat *)buffer)[k] = P[particle].Vel[k]; + ((MyOutputFloat *)buffer)[k] *= sqrt(All.cf_a3inv); /* we are dealing with p = a^2 * xdot */ + } + } + else + { + for(k = 0; k < components; k++) + { + P[particle].Vel[k] = ((MyInputFloat *)buffer)[k]; + } + } +} + +#ifdef OUTPUTACCELERATION +/*! \brief IO function for gravitational accelerations. + * + * Note different a factors in output than in code. + * + * \param[in] particle Index of particle/cell. + * \param[in] components Number of entries in array. + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_accel(int particle, int components, void *out_buffer, int mode) +{ + int k; + + if(mode == 0) + { + if(RestartFlag != 6) + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] = All.cf_a2inv * P[particle].GravAccel[k]; + else + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] = P[particle].GravAccel[k]; +#ifdef PMGRID + if(RestartFlag != 6) + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] += All.cf_a2inv * P[particle].GravPM[k]; + else + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] += P[particle].GravPM[k]; +#endif /* #ifdef PMGRID */ + } + else + { + for(k = 0; k < 3; k++) + P[particle].GravAccel[k] = ((MyOutputFloat *)out_buffer)[k]; + } +} +#endif /* #ifdef OUTPUTACCELERATION */ + +/* -- user defined functions: additional physics -- */ +#ifdef OUTPUTCOOLRATE +/*! \brief Output function of cooling rate. + * + * \param[in] particle Index of particle/cell. + * \param[in] (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_coolrate(int particle, int components, void *buffer, int mode) +{ + double tcool, ne, nh0, coolrate; + + ne = SphP[particle].Ne; + SetOutputGasState(particle, &ne, &nh0, &coolrate); + + /* get cooling time */ + tcool = GetCoolingTime(SphP[particle].Utherm, SphP[particle].Density * All.cf_a3inv, &ne); + + /* convert cooling time with current thermal energy to du/dt */ + if(tcool != 0) + ((MyOutputFloat *)buffer)[0] = SphP[particle].Utherm / tcool; + else + ((MyOutputFloat *)buffer)[0] = 0; +} +#endif /* #ifdef OUTPUTCOOLRATE */ + +/* -- user defined functions: gas properties -- */ +#if defined(COOLING) +/*! \brief IO function of the electron number density. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File IO buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_ne(int particle, int components, void *buffer, int mode) +{ + if(mode == 0) + { + // normal code path: calculate Ne accounting for GFM options and USE_SFR + double ne = SphP[particle].Ne; + +#if defined(USE_SFR) + // reproduces previous behavior that Ne is updated prior to output only for Sfr>0 cells + // if this is unwanted (or redundant) this if() condition should be removed + double nh0, coolrate; + if(get_starformation_rate(particle) > 0) + SetOutputGasState(particle, &ne, &nh0, &coolrate); +#endif /* #if defined(USE_SFR) */ + + ((MyOutputFloat *)buffer)[0] = ne; + } + else + { + SphP[particle].Ne = ((MyInputFloat *)buffer)[0]; + } +} +#endif /* #if defined(COOLING) */ + +#if defined(COOLING) +/*! \brief Output function for neutral hydrogen fraction. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_nh(int particle, int components, void *buffer, int mode) +{ + double ne, nh0, coolrate; + + ne = SphP[particle].Ne; + SetOutputGasState(particle, &ne, &nh0, &coolrate); + + ((MyOutputFloat *)buffer)[0] = nh0; +} +#endif /* #if defined(COOLING) */ + +#ifdef USE_SFR +/*! \brief IO function for star formation rate. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_sfr(int particle, int components, void *buffer, int mode) +{ + if(mode == 0) + { + ((MyOutputFloat *)buffer)[0] = get_starformation_rate(particle); + } + else + { + SphP[particle].Sfr = ((MyOutputFloat *)buffer)[0]; + } +} +#endif + +/* -- user defined functions: other -- */ +#if defined(OUTPUT_CURLVEL) +/*! \brief Output function for curl of velocity field. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File IO buffer. + * \param[in] mode Mode 0: output. + * + * \return void + */ +static void io_func_curlvel(int particle, int components, void *out_buffer, int mode) +{ + if(mode == 0) + { + ((MyOutputFloat *)out_buffer)[0] = SphP[particle].CurlVel; + } +} +#endif /* #if defined(OUTPUT_CURLVEL) */ + +#ifdef OUTPUT_VORTICITY +/*! \brief Output function of vorticity (calculated from velocity spatial + * derivatives). + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File IO buffer. + * \param[in] mode Mode 0: output + * + * \return void + */ +static void io_func_vorticity(int particle, int components, void *out_buffer, int mode) +{ + if(mode == 0) + { + ((MyOutputFloat *)out_buffer)[0] = SphP[particle].Grad.dvel[2][1] - SphP[particle].Grad.dvel[1][2]; + ((MyOutputFloat *)out_buffer)[1] = SphP[particle].Grad.dvel[0][2] - SphP[particle].Grad.dvel[2][0]; + ((MyOutputFloat *)out_buffer)[2] = SphP[particle].Grad.dvel[1][0] - SphP[particle].Grad.dvel[0][1]; + } +} +#endif /* #ifdef OUTPUT_VORTICITY */ + +#ifdef MHD +/*! \brief IO function for magnetic field. + * + * Note that the output is in Gauss unit system (in code units) while the + * internal B-field is in Heaviside-Lorentz system (FACTOR of sqrt(4 PI)!). + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File IO buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_bfield(int particle, int components, void *out_buffer, int mode) +{ + int k; + + if(mode == 0) + { + /* writing: convert from Heavyside-Lorentz to Gauss */ + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] = SphP[particle].B[k] * sqrt(4. * M_PI); + } + else + { + /* reading: convert from Gauss to Heavyside-Lorentz */ + for(k = 0; k < 3; k++) + SphP[particle].B[k] = ((MyInputFloat *)out_buffer)[k] / sqrt(4. * M_PI); + } +} +#endif /* #ifdef MHD */ + +/*! \brief Function for field registering. + * + * For init_field arguments read the description of init_field. + * Don't forget to add the new IO_FLAG to allvars.h. + * + * \return void + */ +void init_io_fields() +{ + /* ALL TYPES */ + +#ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION + enum types_in_file pos_out = FILE_DOUBLE; +#else /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION */ + enum types_in_file pos_out = FILE_MY_IO_FLOAT; +#endif /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION #else */ +#ifdef READ_COORDINATES_IN_DOUBLE + enum types_in_file pos_in = FILE_DOUBLE; +#else /* #ifdef READ_COORDINATES_IN_DOUBLE */ + enum types_in_file pos_in = FILE_MY_IO_FLOAT; +#endif /* #ifdef READ_COORDINATES_IN_DOUBLE #else */ + init_field(IO_POS, "POS ", "Coordinates", MEM_MY_DOUBLE, pos_out, pos_in, 3, A_NONE, 0, io_func_pos, ALL_TYPES); + init_units(IO_POS, 1., -1., 1., 0., 0., All.UnitLength_in_cm); + + init_field(IO_POS_MINI, "POS ", "Coordinates", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_NONE, 3, A_NONE, 0, io_func_pos, ALL_TYPES); + init_units(IO_POS_MINI, 1., -1., 1., 0., 0., All.UnitLength_in_cm); + init_snapshot_type(IO_POS_MINI, SN_MINI_ONLY); /* second IO tag output to mini-snaps always in single precision */ + + init_field(IO_VEL, "VEL ", "Velocities", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_NONE, 0, io_func_vel, + ALL_TYPES); /* particle velocities */ + init_units(IO_VEL, 0.5, 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s); /* sqrt(a)*km/s */ + init_snapshot_type(IO_VEL, SN_MINI); + + init_field(IO_ID, "ID ", "ParticleIDs", MEM_MY_ID_TYPE, FILE_MY_ID_TYPE, FILE_MY_ID_TYPE, 1, A_P, &P[0].ID, 0, ALL_TYPES); + init_units(IO_ID, 0, 0, 0, 0, 0, 0); + init_snapshot_type(IO_ID, SN_MINI); + + init_field(IO_MASS, "MASS", "Masses", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_P, &P[0].Mass, 0, + SET_IN_GET_PARTICLES_IN_BLOCK); /* particle mass */ + init_units(IO_MASS, 0., -1., 0., 1., 0., All.UnitMass_in_g); + init_snapshot_type(IO_MASS, SN_MINI); + +#ifdef OUTPUTPOTENTIAL + init_field(IO_POT, "POT ", "Potential", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_P, &P[0].Potential, 0, + ALL_TYPES); /* gravitational potential */ + init_units(IO_POT, -1.0, 0.0, 0.0, 0.0, 2.0, All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s); /* (km/s)^2/a */ + + init_field(IO_POT_MINI, "POT ", "Potential", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_P, &P[0].Potential, 0, + STARS_ONLY | BHS_ONLY); + init_units(IO_POT_MINI, -1.0, 0.0, 0.0, 0.0, 2.0, All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s); + init_snapshot_type(IO_POT_MINI, SN_MINI_ONLY); /* second IO tag output to mini-snaps for stars/BHs only */ +#endif /* #ifdef OUTPUTPOTENTIAL */ + + /* GAS CELLS */ + + init_field(IO_U, "U ", "InternalEnergy", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].Utherm, 0, + GAS_ONLY); /* internal energy */ + init_units(IO_U, 0., 0., 0., 0., 2., All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s); + init_snapshot_type(IO_U, SN_MINI); + + init_field(IO_RHO, "RHO ", "Density", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].Density, 0, + GAS_ONLY); /* particle density */ + init_units(IO_RHO, -3., 2., -3., 1., 0., All.UnitDensity_in_cgs); + init_snapshot_type(IO_RHO, SN_MINI); + +#ifdef OUTPUT_PRESSURE + init_field(IO_PRESSURE, "PRES", "Pressure", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].Pressure, 0, GAS_ONLY); + init_units(IO_PRESSURE, -3.0, 2.0, -3.0, 1.0, 2.0, + All.UnitDensity_in_cgs * All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s); +#endif /* #ifdef OUTPUT_PRESSURE */ + +#ifdef OUTPUT_CSND + init_field(IO_CSND, "CSND", "SoundSpeed", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].Csnd, 0, GAS_ONLY); + init_units(IO_CSND, 0., 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s); +#endif /* #ifdef OUTPUT_CSND */ + +#if defined(COOLING) + init_field(IO_NE, "NE ", "ElectronAbundance", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_ne, + GAS_ONLY); /* electron abundance */ + init_units(IO_NE, 0, 0, 0, 0, 0, 0); /* dimensionless fraction */ + init_snapshot_type(IO_NE, SN_MINI); + + init_field(IO_NH, "NH ", "NeutralHydrogenAbundance", MEM_NONE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_NONE, 0, io_func_nh, + GAS_ONLY); /* neutral hydrogen fraction */ + init_units(IO_NH, 0, 0, 0, 0, 0, 0); /* dimensionless fraction */ +#endif /* #if defined(COOLING) */ + +#ifdef USE_SFR + init_field(IO_SFR, "SFR ", "StarFormationRate", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_sfr, + GAS_ONLY); /* star formation rate */ + init_units(IO_SFR, 0.0, 0.0, -1.0, 1.0, 1.0, SOLAR_MASS / SEC_PER_YEAR); /* Msun/yr */ + init_snapshot_type(IO_SFR, SN_MINI); +#endif /* #ifdef USE_SFR */ + +#ifdef OUTPUT_DIVVEL + init_field(IO_DIVVEL, "DIVV", "VelocityDivergence", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].DivVel, 0, + GAS_ONLY); + init_units(IO_DIVVEL, 0.0, 1.0, -1.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_DIVVEL */ + +#if defined(OUTPUT_CURLVEL) + init_field(IO_CURLVEL, "ROTV", "VelocityCurl", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_curlvel, + GAS_ONLY); + init_units(IO_CURLVEL, 0.0, 1.0, -1.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #if defined(OUTPUT_CURLVEL) */ + +#ifdef OUTPUT_COOLHEAT + init_field(IO_COOLHEAT, "COHE", "CoolingHeatingEnergy", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].CoolHeat, 0, + GAS_ONLY); + init_units(IO_COOLHEAT, 0.0, 0.0, -1.0, 1.0, 3.0, All.UnitEnergy_in_cgs / All.UnitTime_in_s); +#endif /* #ifdef OUTPUT_COOLHEAT */ + +#ifdef OUTPUT_SURFACE_AREA + init_field(IO_SAREA, "AREA", "SurfaceArea", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].SurfaceArea, 0, + GAS_ONLY); + init_units(IO_SAREA, 2.0, -2.0, 2.0, 0.0, 0.0, All.UnitLength_in_cm * All.UnitLength_in_cm); + + init_field(IO_NFACES, "NFAC", "NumFacesCell", MEM_INT, FILE_INT, FILE_INT, 1, A_SPHP, &SphP[0].CountFaces, 0, GAS_ONLY); + init_units(IO_NFACES, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#ifdef OUTPUTCOOLRATE + init_field(IO_COOLRATE, "COOR", "CoolingRate", MEM_NONE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_NONE, 0, io_func_coolrate, GAS_ONLY); + init_units(IO_COOLRATE, 0.0, 0.0, -1.0, 1.0, 3.0, 1.0); +#endif /* #ifdef OUTPUTCOOLRATE */ + +#ifdef OUTPUT_VORTICITY + init_field(IO_VORT, "VORT", "Vorticity", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_NONE, 0, io_func_vorticity, GAS_ONLY); + init_units(IO_VORT, 0.0, 1.0, -1.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_VORTICITY */ + + /* GAS CELLS GRADIENTS */ + +#ifdef OUTPUT_PRESSURE_GRADIENT + init_field(IO_GRADP, "GRAP", "PressureGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 3, A_SPHP, &SphP[0].Grad.dpress[0], 0, + GAS_ONLY); + init_units(IO_GRADP, -4.0, 3.0, -4.0, 1.0, 2.0, + All.UnitDensity_in_cgs * All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_PRESSURE_GRADIENT */ + +#ifdef OUTPUT_DENSITY_GRADIENT + init_field(IO_GRADR, "GRAR", "DensityGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 3, A_SPHP, &SphP[0].Grad.drho[0], 0, + GAS_ONLY); + init_units(IO_GRADR, -4., 3., -4., 1., 0., All.UnitDensity_in_cgs / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_DENSITY_GRADIENT */ + +#ifdef OUTPUT_VELOCITY_GRADIENT + init_field(IO_GRADV, "GRAV", "VelocityGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 9, A_SPHP, &SphP[0].Grad.dvel[0][0], 0, + GAS_ONLY); + init_units(IO_GRADV, 0., 1., -1., 0., 1., All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); /* sqrt(a)*km/s */ +#endif /* #ifdef OUTPUT_VELOCITY_GRADIENT */ + +#ifdef OUTPUT_BFIELD_GRADIENT + init_field(IO_GRADB, "GRAB", "BfieldGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 9, A_SPHP, &SphP[0].Grad.dB[0][0], 0, + GAS_ONLY); + init_units(IO_GRADB, -3., 2., -2.5, 0.5, 1., pow(All.UnitPressure_in_cgs, 0.5) / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_BFIELD_GRADIENT */ + + /* GAS CELLS (MESH PROPERTIES) */ + +#ifdef OUTPUT_VOLUME + init_field(IO_VOL, "VOL ", "Volume", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].Volume, 0, GAS_ONLY); + init_units(IO_VOL, 3., -3., 3., 0., 0., All.UnitLength_in_cm * All.UnitLength_in_cm * All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_VOLUME */ + +#ifdef OUTPUT_VERTEX_VELOCITY + init_field(IO_VERTEXVEL, "VEVE", "VertexVelocity", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_SPHP, + &SphP[0].VelVertex[0], 0, GAS_ONLY); + init_units(IO_VERTEXVEL, 1., 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s); +#endif /* #ifdef OUTPUT_VERTEX_VELOCITY */ + +#ifdef OUTPUT_MESH_FACE_ANGLE + init_field(IO_FACEANGLE, "FACA", "MaxFaceAngle", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].MaxFaceAngle, 0, + GAS_ONLY); + init_units(IO_FACEANGLE, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef OUTPUT_MESH_FACE_ANGLE */ + +#ifdef OUTPUT_CENTER_OF_MASS + init_field(IO_CM, "CMCE", "CenterOfMass", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_SPHP, &SphP[0].Center[0], 0, + GAS_ONLY); + init_units(IO_CM, 1., -1., 1., 0., 0., All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_CENTER_OF_MASS */ + + /* DIAGNOSTIC */ + +#ifdef OUTPUT_TASK + init_field(IO_TASK, "TASK", "task", MEM_INT, FILE_INT, FILE_NONE, 1, A_NONE, 0, io_func_task, GAS_ONLY); + init_units(IO_TASK, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef OUTPUT_TASK */ + +#ifdef OUTPUT_TIMEBIN_HYDRO + init_field(IO_TIMEBIN_HYDRO, "TBH", "TimebinHydro", MEM_NONE, FILE_INT, FILE_NONE, 1, A_NONE, 0, io_func_timebin_hydro, GAS_ONLY); + init_units(IO_TIMEBIN_HYDRO, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef OUTPUT_TIMEBIN_HYDRO */ + +#ifdef OUTPUTTIMESTEP + init_field(IO_TSTP, "TSTP", "TimeStep", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_timestep, ALL_TYPES); + init_units(IO_TSTP, 0., -1., 1., 0., -1., All.UnitTime_in_s); +#endif /* #ifdef OUTPUTTIMESTEP */ + +#ifdef OUTPUTACCELERATION + init_field(IO_ACCEL, "ACCE", "Acceleration", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_NONE, 0, io_func_accel, ALL_TYPES); + init_units(IO_ACCEL, -1., 1., -1., 0., 2., All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUTACCELERATION */ + +#ifdef OUTPUT_SOFTENINGS + init_field(IO_SOFTENING, "SOFT", "Softenings", MEM_NONE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_NONE, 0, io_func_softenings, ALL_TYPES); + init_units(IO_SOFTENING, 1., -1., 1., 0., 0., All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_SOFTENINGS */ + +#ifdef OUTPUTGRAVINTERACTIONS + init_field(IO_GRAVITERACTIONS, "GINT", "GravityInteractions", MEM_INT, FILE_INT, FILE_NONE, 1, A_SPHP, &SphP[0].GravInteractions, 0, + ALL_TYPES); + init_units(IO_GRAVITERACTIONS, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + + /* MHD */ + +#ifdef MHD + enum types_in_file mhd_read = FILE_MY_IO_FLOAT; +#if defined(MHD_SEEDFIELD) + if(RestartFlag == 0) + mhd_read = FILE_NONE; /* magnetic field not expected in ICs */ +#endif /* #if defined(MHD_SEEDFIELD) */ + + init_field(IO_BFLD, "BFLD", "MagneticField", MEM_NONE, FILE_MY_IO_FLOAT, mhd_read, 3, A_NONE, 0, io_func_bfield, + GAS_ONLY); /* magnetic field */ + init_units(IO_BFLD, -2., 1., -1.5, 0.5, 1., pow(All.UnitPressure_in_cgs, 0.5)); + + init_field(IO_DIVB, "DIVB", "MagneticFieldDivergence", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].DivB, 0, + GAS_ONLY); /* divergence of magnetic field */ + init_units(IO_DIVB, -3., 2., -2.5, 0.5, 1., pow(All.UnitPressure_in_cgs, 0.5) / All.UnitLength_in_cm); +#endif /* #ifdef MHD */ + + /* Scalars */ + +#ifdef PASSIVE_SCALARS + init_field(IO_PASS, "PASS", "PassiveScalars", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, PASSIVE_SCALARS, A_SPHP, + &SphP[0].PScalars[0], 0, GAS_ONLY); + init_units(IO_PASS, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef PASSIVE_SCALARS */ + + /* OTHER */ + +#ifdef SAVE_HSML_IN_SNAPSHOT + init_field(IO_SUBFINDDENSITY, "SFDE", "SubfindDensity", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, &PS[0].SubfindDensity, 0, + ALL_TYPES); + init_units(IO_SUBFINDDENSITY, -3., 2., -3., 1., 0., All.UnitDensity_in_cgs); + init_snapshot_type(IO_SUBFINDDENSITY, SN_NO_SUBBOX); + + init_field(IO_SUBFINDDMDENSITY, "SFDD", "SubfindDMDensity", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, + &PS[0].SubfindDMDensity, 0, ALL_TYPES); + init_units(IO_SUBFINDDMDENSITY, -3., 2., -3., 1., 0., All.UnitDensity_in_cgs); + init_snapshot_type(IO_SUBFINDDMDENSITY, SN_NO_SUBBOX); + + init_field(IO_SUBFINDHSML, "SFHS", "SubfindHsml", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, &PS[0].SubfindHsml, 0, + ALL_TYPES); + init_units(IO_SUBFINDHSML, 1., -1., 1., 0., 0., All.UnitLength_in_cm); + init_snapshot_type(IO_SUBFINDHSML, SN_NO_SUBBOX); + + init_field(IO_SUBFINDVELDISP, "SFVD", "SubfindVelDisp", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, &PS[0].SubfindVelDisp, 0, + ALL_TYPES); + init_units(IO_SUBFINDVELDISP, 0.0, 0.0, 0.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s); + init_snapshot_type(IO_SUBFINDVELDISP, SN_NO_SUBBOX); +#endif /* #ifdef SAVE_HSML_IN_SNAPSHOT */ + +#if defined(REFINEMENT_HIGH_RES_GAS) + init_field(IO_HIGHRESMASS, "HRGM", "HighResGasMass", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].HighResMass, 0, + GAS_ONLY); + init_units(IO_HIGHRESMASS, 0, -1, 0, 1, 0, All.UnitMass_in_g); + + init_field(IO_ALLOWREFINEMENT, "REF ", "AllowRefinement", MEM_INT, FILE_INT, FILE_INT, 1, A_SPHP, &SphP[0].AllowRefinement, 0, + GAS_ONLY); + init_units(IO_ALLOWREFINEMENT, 0, 0, 0, 0, 0, 0); +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ +} diff --git a/src/amuse/community/arepo/src/io/logs.c b/src/amuse/community/arepo/src/io/logs.c new file mode 100644 index 0000000000..6354cf3609 --- /dev/null +++ b/src/amuse/community/arepo/src/io/logs.c @@ -0,0 +1,623 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/logs.c + * \date 05/2018 + * \brief Log-files handling. + * \details contains functions: + * void open_logfiles(void) + * void close_logfiles(void) + * void output_log_messages(void) + * void init_cpu_log(void) + * void write_cpu_log(void) + * void put_symbol(char *string, double t0, double t1, char c) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#define CPU_STRING_LEN 120 + +/*! \brief Contains informations about the used CPU timers like it's name, + * symbols etc. + */ +struct timer_d Timer_data[CPU_LAST + 1]; + +enum timers TimerStack[TIMER_STACK_DEPTH]; +int TimerStackPos = 0; + +/*! \brief Opens files for logging. + * + * This function opens various log-files that report on the status and + * performance of the simulation. Upon restart, the code will append to + * these files. + * + * \return void + */ +void open_logfiles(void) +{ + char mode[2], buf[1000], msg[1000]; + + if(RestartFlag == 0) + strcpy(mode, "w"); + else + strcpy(mode, "a"); + + if(ThisTask == 0) + mkdir(All.OutputDir, 02755); + + MPI_Barrier(MPI_COMM_WORLD); + +#ifdef DETAILEDTIMINGS + sprintf(buf, "%stimings_detailed_%d.txt", All.OutputDir, ThisTask); + if(!(FdDetailed = fopen(buf, mode))) + terminate("error in opening file '%s'\n", buf); +#endif /* #ifdef DETAILEDTIMINGS */ + + if(ThisTask != 0) /* only the root processors writes to the log files */ + return; + + sprintf(buf, "%s%s", All.OutputDir, "cpu.txt"); + if(!(FdCPU = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "info.txt"); + if(!(FdInfo = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "energy.txt"); + if(!(FdEnergy = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "timings.txt"); + if(!(FdTimings = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "balance.txt"); + if(!(FdBalance = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "timebins.txt"); + if(!(FdTimebin = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "domain.txt"); + if(!(FdDomain = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "memory.txt"); + if(!(FdMemory = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + +#ifdef FORCETEST + sprintf(buf, "%s%s", All.OutputDir, "forcetest.txt"); + if(!(FdForceTest = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + fclose(FdForceTest); +#endif /* #ifdef FORCETEST */ + +#ifdef RESTART_DEBUG + sprintf(buf, "%s%s", All.OutputDir, "restartdebug.txt"); + if(!(FdRestartTest = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } +#endif /* #ifdef RESTART_DEBUG */ + +#ifdef OUTPUT_CPU_CSV + sprintf(buf, "%s%s", All.OutputDir, "cpu.csv"); + if(!(FdCPUCSV = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef USE_SFR + sprintf(buf, "%s%s", All.OutputDir, "sfr.txt"); + if(!(FdSfr = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } +#endif /* #ifdef USE_SFR */ + + int i = 0; + fprintf(FdBalance, "\n"); + +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "STEP, TIME, CPUS, MULTIPLEDOMAIN, HIGHESTTIMEBIN, "); +#endif /* #ifdef OUTPUT_CPU_CSV */ + for(; i < CPU_LAST; i++) + { + if(Timer_data[i].symb != 0 && Timer_data[i].symbImbal != 0) + { + fprintf(FdBalance, "%-20s = '%c' / '%c'\n", Timer_data[i].longname, Timer_data[i].symb, Timer_data[i].symbImbal); + } +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "%s1, %s2, %s3, ", Timer_data[i].shortname, Timer_data[i].shortname, Timer_data[i].shortname); +#endif /* #ifdef OUTPUT_CPU_CSV */ + } + fprintf(FdBalance, "\n"); + +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "\n"); +#endif /* #ifdef OUTPUT_CPU_CSV */ +} + +/*! \brief Closes the global log-files. + * + * \return void + */ +void close_logfiles(void) +{ + if(ThisTask != 0) /* only the root processors writes to the log files */ + return; + + fclose(FdCPU); + fclose(FdInfo); + fclose(FdEnergy); + fclose(FdTimings); + fclose(FdBalance); + fclose(FdTimebin); + +#ifdef OUTPUT_CPU_CSV + fclose(FdCPUCSV); +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef USE_SFR + fclose(FdSfr); +#endif /* #ifdef USE_SFR */ +} + +/*! \brief Writes log messages in log-files. + * + * At each time step this function writes on to two log-files. + * In FdInfo, it just lists the timesteps that have been done, while in + * FdTimeBin it outputs information about the active and occupied time-bins. + * Additionally, reports to memory log-files are written. + * + * \return void + */ +void output_log_messages(void) +{ + double z; + int i, j, write_logs = 1; + double sum, avg_CPU_TimeBin[TIMEBINS], frac_CPU_TimeBin[TIMEBINS]; + int weight, corr_weight; + long long tot_cumulative_grav[TIMEBINS], tot_cumulative_sph[TIMEBINS]; + long long tot_grav, tot_sph; + + TIMER_START(CPU_LOGS); + + if(write_logs) + report_detailed_memory_usage_of_largest_task(); + + long long count[4 * TIMEBINS], tot_count[4 * TIMEBINS]; + long long *tot_count_grav = &tot_count[0], *tot_count_sph = &tot_count[TIMEBINS]; + int nelem = 2 * TIMEBINS; + + for(int i = 0; i < TIMEBINS; i++) + count[i] = TimeBinsGravity.TimeBinCount[i]; + + for(int i = 0; i < TIMEBINS; i++) + count[i + TIMEBINS] = TimeBinsHydro.TimeBinCount[i]; + + MPI_Reduce(count, tot_count, nelem, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + if(All.ComovingIntegrationOn) + { + z = 1.0 / (All.Time) - 1; + + if(write_logs) + fprintf(FdInfo, + "\nSync-Point %d, TimeBin=%d, Time: %g, Redshift: %g, Systemstep: %g, Dloga: %g, Nsync-grv: %10llu, Nsync-hyd: " + "%10llu\n", + All.NumCurrentTiStep, All.HighestActiveTimeBin, All.Time, z, All.TimeStep, + log(All.Time) - log(All.Time - All.TimeStep), All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro); + + printf("\n\nSync-Point %d, Time: %g, Redshift: %g, Systemstep: %g, Dloga: %g, Nsync-grv: %10llu, Nsync-hyd: %10llu\n", + All.NumCurrentTiStep, All.Time, z, All.TimeStep, log(All.Time) - log(All.Time - All.TimeStep), + All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro); + + if(write_logs) + fprintf(FdTimebin, "\nSync-Point %d, Time: %g, Redshift: %g, Systemstep: %g, Dloga: %g\n", All.NumCurrentTiStep, All.Time, + z, All.TimeStep, log(All.Time) - log(All.Time - All.TimeStep)); + + myflush(FdInfo); + } + else + { + if(write_logs) + fprintf(FdInfo, "\nSync-Point %d, TimeBin=%d, Time: %g, Systemstep: %g, Nsync-grv: %10llu, Nsync-hyd: %10llu\n", + All.NumCurrentTiStep, All.HighestActiveTimeBin, All.Time, All.TimeStep, All.GlobalNSynchronizedGravity, + All.GlobalNSynchronizedHydro); + + printf("\n\nSync-Point %d, Time: %g, Systemstep: %g, Nsync-grv: %10llu, Nsync-hyd: %10llu\n", All.NumCurrentTiStep, All.Time, + All.TimeStep, All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro); + + if(write_logs) + fprintf(FdTimebin, "\nSync-Point %d, Time: %g, Systemstep: %g\n", All.NumCurrentTiStep, All.Time, All.TimeStep); + + myflush(FdInfo); + } + + for(i = 1, tot_cumulative_grav[0] = tot_count_grav[0], tot_cumulative_sph[0] = tot_count_sph[0]; i < TIMEBINS; i++) + { + tot_cumulative_grav[i] = tot_count_grav[i] + tot_cumulative_grav[i - 1]; + tot_cumulative_sph[i] = tot_count_sph[i] + tot_cumulative_sph[i - 1]; + } + + for(i = 0; i < TIMEBINS; i++) + { + for(j = 0, sum = 0; j < All.CPU_TimeBinCountMeasurements[i]; j++) + sum += All.CPU_TimeBinMeasurements[i][j]; + if(All.CPU_TimeBinCountMeasurements[i]) + avg_CPU_TimeBin[i] = sum / All.CPU_TimeBinCountMeasurements[i]; + else + avg_CPU_TimeBin[i] = 0; + } + + for(i = All.HighestOccupiedTimeBin, weight = 1, sum = 0; i >= 0 && tot_count_grav[i] > 0; i--, weight *= 2) + { + if(weight > 1) + corr_weight = weight / 2; + else + corr_weight = weight; + + frac_CPU_TimeBin[i] = corr_weight * avg_CPU_TimeBin[i]; + sum += frac_CPU_TimeBin[i]; + } + + for(i = All.HighestOccupiedTimeBin; i >= 0 && tot_count_grav[i] > 0; i--) + { + if(sum) + frac_CPU_TimeBin[i] /= sum; + } + + char tracerString[13]; + + sprintf(tracerString, "%s", ""); + + char dustString[13]; + sprintf(dustString, "%s", ""); + if(write_logs) + fprintf(FdTimebin, + "Occupied timebins: gravity hydro %s %s dt cumul-grav cumul-sph A D avg-time " + "cpu-frac\n", + tracerString, dustString); + + for(i = TIMEBINS - 1, tot_grav = tot_sph = 0; i >= 0; i--) + { + int binUsed = 0; + +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + if(tot_count_grav[i] > 0) + binUsed = 1; +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + */ + + if(tot_count_sph[i] > 0) + binUsed = 1; + + sprintf(tracerString, "%s", ""); + + if(binUsed) + { + if(write_logs) + fprintf(FdTimebin, " %c bin=%2d %10llu %10llu %s %s %16.12f %10llu %10llu %c %c %10.2f %5.1f%%\n", + TimeBinSynchronized[i] ? 'X' : ' ', i, tot_count_grav[i], tot_count_sph[i], tracerString, dustString, + i > 0 ? (((integertime)1) << i) * All.Timebase_interval : 0.0, tot_cumulative_grav[i], tot_cumulative_sph[i], + (i == All.HighestActiveTimeBin) ? '<' : ' ', + (All.HighestActiveTimeBin >= All.SmallestTimeBinWithDomainDecomposition && i == All.HighestActiveTimeBin) + ? '*' + : ' ', + avg_CPU_TimeBin[i], 100.0 * frac_CPU_TimeBin[i]); + + if(TimeBinSynchronized[i]) + { + tot_grav += tot_count_grav[i]; + tot_sph += tot_count_sph[i]; + } + } + } + + if(write_logs) + { + fprintf(FdTimebin, " ------------------------\n"); + } + + sprintf(tracerString, "%s", ""); + sprintf(dustString, "%s", ""); + + if(write_logs) + { +#ifdef PMGRID + if(All.PM_Ti_endstep == All.Ti_Current) + { + fprintf(FdTimebin, "PM-Step. Total: %10llu %10llu %s %s\n", tot_grav, tot_sph, tracerString, dustString); + } + else +#endif /* #ifdef PMGRID */ + { + fprintf(FdTimebin, "Total active: %10llu %10llu %s %s\n", tot_grav, tot_sph, tracerString, dustString); + } + + fprintf(FdTimebin, "\n"); + } + + myflush(FdTimebin); + } + +#ifdef RESTART_DEBUG + log_restart_debug(); +#endif /* #ifdef RESTART_DEBUG */ + + TIMER_STOP(CPU_LOGS); +} + +/*! \brief Initializes cpu log file. + * + * \return void + */ +void init_cpu_log(void) +{ + int i = 0; + +#define TIMER_STRUCT +#include "../utils/timer.h" + + for(i = 0; i < CPU_LAST; i++) + { + if(Timer_data[i].parent >= 0) + Timer_data[i].depth = Timer_data[Timer_data[i].parent].depth + 1; + else + Timer_data[i].depth = 0; + } + + for(i = 0; i < CPU_LAST; i++) + { + All.CPU_Sum[i] = 0.; + CPU_Step[i] = 0.; + } + + TimerStackPos = 0; + TimerStack[0] = CPU_MISC; + + CPUThisRun = 0.; + + WallclockTime = second(); + StartOfRun = second(); +} + +/*! \brief Write the FdBalance and FdCPU files. + * + * At each time step this function writes on to two log-files. + * In FdBalance, it outputs in a graphical way the amount of + * time spent in the various parts of the code, while + * in FdCPU it writes information about the cpu-time consumption + * of the various modules. + * + * \return void + */ +void write_cpu_log(void) +{ + int write_logs = 1; + double max_CPU_Step[CPU_LAST], avg_CPU_Step[CPU_LAST], summed_CPU_Step[CPU_LAST]; + double t0, t1, tsum; + double avg_total = 0; + double local_total = 0; + double max_total = 0; + int i; + + TIMER_START(CPU_LOGS); + + for(i = 0; i < CPU_LAST; i++) + { + local_total += CPU_Step[i]; + } + + MPI_Reduce(CPU_Step, max_CPU_Step, CPU_LAST, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&local_total, &max_total, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(CPU_Step, avg_CPU_Step, CPU_LAST, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + /* sum up cpu items into groups */ + for(i = 0; i < CPU_LAST; i++) + { + summed_CPU_Step[i] = avg_CPU_Step[i]; + } + for(i = CPU_LAST - 1; i > CPU_ALL; i--) + { + if(Timer_data[i].parent >= 0) + { + summed_CPU_Step[Timer_data[i].parent] += summed_CPU_Step[i]; + } + } + + /* calc averages, update All.CPU_Sum */ + for(i = 0; i < CPU_LAST; i++) + { + avg_CPU_Step[i] /= NTask; + avg_total += avg_CPU_Step[i]; + + summed_CPU_Step[i] /= NTask; + All.CPU_Sum[i] += summed_CPU_Step[i]; + } + + /* create balance.txt string */ + char cpu_String[CPU_STRING_LEN + 1]; + put_symbol(cpu_String, 0., 1.0, '-'); + + for(i = 1, tsum = 0.0; i < CPU_LAST; i++) + { + if(max_CPU_Step[i] > 0 && Timer_data[i].symb != 0 && Timer_data[i].symbImbal != 0) + { + t0 = tsum; + t1 = tsum + avg_CPU_Step[i] * (avg_CPU_Step[i] / max_CPU_Step[i]); + put_symbol(cpu_String, t0 / avg_total, t1 / avg_total, Timer_data[i].symb); + tsum += t1 - t0; + + t0 = tsum; + t1 = tsum + avg_CPU_Step[i] * ((max_CPU_Step[i] - avg_CPU_Step[i]) / max_CPU_Step[i]); + put_symbol(cpu_String, t0 / avg_total, t1 / avg_total, Timer_data[i].symbImbal); + tsum += t1 - t0; + } + } + + if(write_logs) + { + fprintf(FdBalance, "Step=%7d sec=%10.3f Nsync-grv=%10llu Nsync-hyd=%10llu %s\n", All.NumCurrentTiStep, max_total, + All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro, cpu_String); + } + + myflush(FdBalance); + + if(All.CPU_TimeBinCountMeasurements[All.HighestActiveTimeBin] == NUMBER_OF_MEASUREMENTS_TO_RECORD) + { + All.CPU_TimeBinCountMeasurements[All.HighestActiveTimeBin]--; + memmove(&All.CPU_TimeBinMeasurements[All.HighestActiveTimeBin][0], &All.CPU_TimeBinMeasurements[All.HighestActiveTimeBin][1], + (NUMBER_OF_MEASUREMENTS_TO_RECORD - 1) * sizeof(double)); + } + + All.CPU_TimeBinMeasurements[All.HighestActiveTimeBin][All.CPU_TimeBinCountMeasurements[All.HighestActiveTimeBin]++] = max_total; + + if(write_logs) + { +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "%d, %g, %d, %d, %d, ", All.NumCurrentTiStep, All.Time, NTask, All.MultipleDomains, + All.HighestActiveTimeBin); +#endif /* #ifdef OUTPUT_CPU_CSV */ + fprintf(FdCPU, "Step %d, Time: %g, CPUs: %d, MultiDomains: %d, HighestActiveTimeBin: %d\n", All.NumCurrentTiStep, All.Time, + NTask, All.MultipleDomains, All.HighestActiveTimeBin); + + fprintf(FdCPU, " diff cumulative\n"); + + for(i = 0; i < CPU_LAST; i++) + { + fprintf(FdCPU, "%*s%*s%10.2f %5.1f%% %10.2f %*s%5.1f%%\n", 2 * Timer_data[i].depth, "", -20 + 2 * Timer_data[i].depth, + Timer_data[i].longname, summed_CPU_Step[i], summed_CPU_Step[i] / summed_CPU_Step[CPU_ALL] * 100., All.CPU_Sum[i], + 5 * Timer_data[i].depth, "", All.CPU_Sum[i] / All.CPU_Sum[CPU_ALL] * 100.); + +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "%f, %f, %f, ", summed_CPU_Step[i], All.CPU_Sum[i], All.CPU_Sum[i] / All.CPU_Sum[CPU_ALL] * 100.); +#endif /* #ifdef OUTPUT_CPU_CSV */ + } + + fprintf(FdCPU, "\n"); + } + + myflush(FdCPU); + +#ifdef OUTPUT_CPU_CSV + if(write_logs) + fprintf(FdCPUCSV, "\n"); + + myflush(FdCPUCSV); +#endif /* #ifdef OUTPUT_CPU_CSV */ + } + + for(i = 0; i < CPU_LAST; i++) + CPU_Step[i] = 0.; + + CPUThisRun = timediff(StartOfRun, second()); + + TIMER_STOP(CPU_LOGS); +} + +/*! \brief Fill the cpu balance string representing the cpu usage in a + * graphical way. + * + * This function fills a fraction, specified by the parameters t0 and t1, + * of the array string with the debug symbol given by c. + * + * \param[out] string String to fill. + * \param[in] t0 Initial position of the symbol in the array as a fraction of + * its maximum dimension. + * \param[in] t1 Final position of the symbol in the array as a fraction of + * its maximum dimension. + * \param[in] c Symbol to be put on string. + * + * \return void + */ +void put_symbol(char *string, double t0, double t1, char c) +{ + int i, j; + + i = (int)(t0 * CPU_STRING_LEN + 0.5); + j = (int)(t1 * CPU_STRING_LEN); + + if(i < 0) + i = 0; + if(j >= CPU_STRING_LEN) + j = CPU_STRING_LEN; + + while(i <= j) + string[i++] = c; + + string[CPU_STRING_LEN] = 0; +} diff --git a/src/amuse/community/arepo/src/io/parameters.c b/src/amuse/community/arepo/src/io/parameters.c new file mode 100644 index 0000000000..059d422ceb --- /dev/null +++ b/src/amuse/community/arepo/src/io/parameters.c @@ -0,0 +1,861 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/parameters.c + * \date 05/2018 + * \brief Parses the parameter file. + * \details This file contains the routine to parse the parameter file. + * Additionally the output list is also parsed. + * contains functions: + * void read_parameter_file(char *fname) + * void check_parameters() + * int read_outputlist(char *fname) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief This function parses the parameter file. + * + * Each parameter is defined by a keyword (`tag'), and can be either + * of type douple, int, or character string. Three arrays containing the name, + * type and address of the parameter are filled first. The routine then parses + * the parameter file and fills the referenced variables. The routine makes + * sure that each parameter appears exactly once in the parameter file, + * otherwise error messages are produced that complain about the missing + * parameters. + * + * \param[in] fname The file name of the parameter file + * + * \return void + */ +void read_parameter_file(char *fname) +{ +#define REAL 1 +#define STRING 2 +#define INT 3 + + FILE *fd, *fdout; + char buf[MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 200], buf1[MAXLEN_PARAM_TAG + 200], buf2[MAXLEN_PARAM_VALUE + 200], + buf3[MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 400]; + int i, j, nt; + int id[MAX_PARAMETERS]; + void *addr[MAX_PARAMETERS]; + char tag[MAX_PARAMETERS][MAXLEN_PARAM_TAG]; + int param_handled[MAX_PARAMETERS]; + int errorFlag = 0; + + All.StarformationOn = 0; /* defaults */ + + for(i = 0; i < MAX_PARAMETERS; i++) + { + param_handled[i] = 0; + } + + if(sizeof(long long) != 8) + { + mpi_terminate("\nType `long long' is not 64 bit on this platform. Stopping.\n\n"); + } + + if(sizeof(int) != 4) + { + mpi_terminate("\nType `int' is not 32 bit on this platform. Stopping.\n\n"); + } + + if(sizeof(float) != 4) + { + mpi_terminate("\nType `float' is not 32 bit on this platform. Stopping.\n\n"); + } + + if(sizeof(double) != 8) + { + mpi_terminate("\nType `double' is not 64 bit on this platform. Stopping.\n\n"); + } + + if(ThisTask == 0) /* read parameter file on process 0 */ + { + nt = 0; + + strcpy(tag[nt], "InitCondFile"); + addr[nt] = All.InitCondFile; + id[nt++] = STRING; + + strcpy(tag[nt], "OutputDir"); + addr[nt] = All.OutputDir; + id[nt++] = STRING; + +#ifdef TOLERATE_WRITE_ERROR + strcpy(tag[nt], "AlternativeOutputDir"); + addr[nt] = AlternativeOutputDir; + id[nt++] = STRING; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + strcpy(tag[nt], "SnapshotFileBase"); + addr[nt] = All.SnapshotFileBase; + id[nt++] = STRING; + + strcpy(tag[nt], "ResubmitCommand"); + addr[nt] = All.ResubmitCommand; + id[nt++] = STRING; + + strcpy(tag[nt], "OutputListFilename"); + addr[nt] = All.OutputListFilename; + id[nt++] = STRING; + + strcpy(tag[nt], "OutputListOn"); + addr[nt] = &All.OutputListOn; + id[nt++] = INT; + + strcpy(tag[nt], "Omega0"); + addr[nt] = &All.Omega0; + id[nt++] = REAL; + + strcpy(tag[nt], "OmegaBaryon"); + addr[nt] = &All.OmegaBaryon; + id[nt++] = REAL; + + strcpy(tag[nt], "OmegaLambda"); + addr[nt] = &All.OmegaLambda; + id[nt++] = REAL; + + strcpy(tag[nt], "HubbleParam"); + addr[nt] = &All.HubbleParam; + id[nt++] = REAL; + + strcpy(tag[nt], "BoxSize"); + addr[nt] = &All.BoxSize; + id[nt++] = REAL; + + strcpy(tag[nt], "PeriodicBoundariesOn"); + addr[nt] = &All.PeriodicBoundariesOn; + id[nt++] = INT; + + strcpy(tag[nt], "MaxMemSize"); + addr[nt] = &All.MaxMemSize; + id[nt++] = INT; + + strcpy(tag[nt], "TimeOfFirstSnapshot"); + addr[nt] = &All.TimeOfFirstSnapshot; + id[nt++] = REAL; + + strcpy(tag[nt], "CpuTimeBetRestartFile"); + addr[nt] = &All.CpuTimeBetRestartFile; + id[nt++] = REAL; + +#ifdef REDUCE_FLUSH + strcpy(tag[nt], "FlushCpuTimeDiff"); + addr[nt] = &All.FlushCpuTimeDiff; + id[nt++] = REAL; +#endif /* #ifdef REDUCE_FLUSH */ + + strcpy(tag[nt], "TimeBetStatistics"); + addr[nt] = &All.TimeBetStatistics; + id[nt++] = REAL; + + strcpy(tag[nt], "TimeBegin"); + addr[nt] = &All.TimeBegin; + id[nt++] = REAL; + + strcpy(tag[nt], "TimeMax"); + addr[nt] = &All.TimeMax; + id[nt++] = REAL; + + strcpy(tag[nt], "TimeBetSnapshot"); + addr[nt] = &All.TimeBetSnapshot; + id[nt++] = REAL; + + strcpy(tag[nt], "UnitVelocity_in_cm_per_s"); + addr[nt] = &All.UnitVelocity_in_cm_per_s; + id[nt++] = REAL; + + strcpy(tag[nt], "UnitLength_in_cm"); + addr[nt] = &All.UnitLength_in_cm; + id[nt++] = REAL; + + strcpy(tag[nt], "UnitMass_in_g"); + addr[nt] = &All.UnitMass_in_g; + id[nt++] = REAL; + + strcpy(tag[nt], "ErrTolIntAccuracy"); + addr[nt] = &All.ErrTolIntAccuracy; + id[nt++] = REAL; + + strcpy(tag[nt], "ErrTolTheta"); + addr[nt] = &All.ErrTolTheta; + id[nt++] = REAL; + + strcpy(tag[nt], "ErrTolForceAcc"); + addr[nt] = &All.ErrTolForceAcc; + id[nt++] = REAL; + + strcpy(tag[nt], "MaxSizeTimestep"); + addr[nt] = &All.MaxSizeTimestep; + id[nt++] = REAL; + + strcpy(tag[nt], "MinSizeTimestep"); + addr[nt] = &All.MinSizeTimestep; + id[nt++] = REAL; + + strcpy(tag[nt], "CourantFac"); + addr[nt] = &All.CourantFac; + id[nt++] = REAL; + + strcpy(tag[nt], "LimitUBelowThisDensity"); + addr[nt] = &All.LimitUBelowThisDensity; + id[nt++] = REAL; + + strcpy(tag[nt], "LimitUBelowCertainDensityToThisValue"); + addr[nt] = &All.LimitUBelowCertainDensityToThisValue; + id[nt++] = REAL; + + strcpy(tag[nt], "DesNumNgb"); + addr[nt] = &All.DesNumNgb; + id[nt++] = INT; + + strcpy(tag[nt], "MultipleDomains"); + addr[nt] = &All.MultipleDomains; + id[nt++] = INT; + + strcpy(tag[nt], "TopNodeFactor"); + addr[nt] = &All.TopNodeFactor; + id[nt++] = REAL; + + strcpy(tag[nt], "ActivePartFracForNewDomainDecomp"); + addr[nt] = &All.ActivePartFracForNewDomainDecomp; + id[nt++] = REAL; + +#ifdef SUBFIND + strcpy(tag[nt], "DesLinkNgb"); + addr[nt] = &All.DesLinkNgb; + id[nt++] = INT; + + strcpy(tag[nt], "ErrTolThetaSubfind"); + addr[nt] = &All.ErrTolThetaSubfind; + id[nt++] = REAL; +#endif /* #ifdef SUBFIND */ + +#if defined(ISOTHERM_EQS) + strcpy(tag[nt], "IsoSoundSpeed"); + addr[nt] = &All.IsoSoundSpeed; + id[nt++] = REAL; +#endif /* #if defined(ISOTHERM_EQS) */ + + strcpy(tag[nt], "MaxNumNgbDeviation"); + addr[nt] = &All.MaxNumNgbDeviation; + id[nt++] = REAL; + + strcpy(tag[nt], "ComovingIntegrationOn"); + addr[nt] = &All.ComovingIntegrationOn; + id[nt++] = INT; + + strcpy(tag[nt], "ICFormat"); + addr[nt] = &All.ICFormat; + id[nt++] = INT; + + strcpy(tag[nt], "SnapFormat"); + addr[nt] = &All.SnapFormat; + id[nt++] = INT; + + strcpy(tag[nt], "NumFilesPerSnapshot"); + addr[nt] = &All.NumFilesPerSnapshot; + id[nt++] = INT; + + strcpy(tag[nt], "NumFilesWrittenInParallel"); + addr[nt] = &All.NumFilesWrittenInParallel; + id[nt++] = INT; + + strcpy(tag[nt], "ResubmitOn"); + addr[nt] = &All.ResubmitOn; + id[nt++] = INT; + + strcpy(tag[nt], "CoolingOn"); + addr[nt] = &All.CoolingOn; + id[nt++] = INT; + + strcpy(tag[nt], "StarformationOn"); + addr[nt] = &All.StarformationOn; + id[nt++] = INT; + + strcpy(tag[nt], "TypeOfTimestepCriterion"); + addr[nt] = &All.TypeOfTimestepCriterion; + id[nt++] = INT; + + strcpy(tag[nt], "TypeOfOpeningCriterion"); + addr[nt] = &All.TypeOfOpeningCriterion; + id[nt++] = INT; + + strcpy(tag[nt], "TimeLimitCPU"); + addr[nt] = &All.TimeLimitCPU; + id[nt++] = REAL; + + strcpy(tag[nt], "GasSoftFactor"); + addr[nt] = &All.GasSoftFactor; + id[nt++] = REAL; + + for(i = 0; i < NSOFTTYPES; i++) + { + char buf[100]; + sprintf(buf, "SofteningComovingType%d", i); + strcpy(tag[nt], buf); + addr[nt] = &All.SofteningComoving[i]; + id[nt++] = REAL; + } + + for(i = 0; i < NSOFTTYPES; i++) + { + char buf[100]; + sprintf(buf, "SofteningMaxPhysType%d", i); + strcpy(tag[nt], buf); + addr[nt] = &All.SofteningMaxPhys[i]; + id[nt++] = REAL; + } + + for(i = 0; i < NTYPES; i++) + { + char buf[100]; + sprintf(buf, "SofteningTypeOfPartType%d", i); + strcpy(tag[nt], buf); + addr[nt] = &All.SofteningTypeOfPartType[i]; + id[nt++] = INT; + } + +#ifdef ADAPTIVE_HYDRO_SOFTENING + strcpy(tag[nt], "MinimumComovingHydroSoftening"); + addr[nt] = &All.MinimumComovingHydroSoftening; + id[nt++] = REAL; + + strcpy(tag[nt], "AdaptiveHydroSofteningSpacing"); + addr[nt] = &All.AdaptiveHydroSofteningSpacing; + id[nt++] = REAL; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + + strcpy(tag[nt], "GravityConstantInternal"); + addr[nt] = &All.GravityConstantInternal; + id[nt++] = REAL; + + strcpy(tag[nt], "InitGasTemp"); + addr[nt] = &All.InitGasTemp; + id[nt++] = REAL; + + strcpy(tag[nt], "MinGasTemp"); + addr[nt] = &All.MinGasTemp; + id[nt++] = REAL; + + strcpy(tag[nt], "MinEgySpec"); + addr[nt] = &All.MinEgySpec; + id[nt++] = REAL; + + strcpy(tag[nt], "MinimumDensityOnStartUp"); + addr[nt] = &All.MinimumDensityOnStartUp; + id[nt++] = REAL; + +#ifdef NODEREFINE_BACKGROUND_GRID + strcpy(tag[nt], "MeanVolume"); + addr[nt] = &All.MeanVolume; + id[nt++] = REAL; +#endif /* #ifdef NODEREFINE_BACKGROUND_GRID */ + +#ifndef VORONOI_STATIC_MESH +#ifdef REGULARIZE_MESH_FACE_ANGLE + strcpy(tag[nt], "CellMaxAngleFactor"); + addr[nt] = &All.CellMaxAngleFactor; + id[nt++] = REAL; +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + strcpy(tag[nt], "CellShapingFactor"); + addr[nt] = &All.CellShapingFactor; + id[nt++] = REAL; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + + strcpy(tag[nt], "CellShapingSpeed"); + addr[nt] = &All.CellShapingSpeed; + id[nt++] = REAL; +#endif /* #ifndef VORONOI_STATIC_MESH */ + +#if defined(COOLING) + strcpy(tag[nt], "TreecoolFile"); + addr[nt] = &All.TreecoolFile; + id[nt++] = STRING; +#endif /* #if defined(COOLING) */ + +#if defined(REFINEMENT) + strcpy(tag[nt], "ReferenceGasPartMass"); + addr[nt] = &All.ReferenceGasPartMass; + id[nt++] = REAL; + + strcpy(tag[nt], "TargetGasMassFactor"); + addr[nt] = &All.TargetGasMassFactor; + id[nt++] = REAL; + + strcpy(tag[nt], "RefinementCriterion"); + addr[nt] = &All.RefinementCriterion; + id[nt++] = INT; + + strcpy(tag[nt], "DerefinementCriterion"); + addr[nt] = &All.DerefinementCriterion; + id[nt++] = INT; +#endif /* #if defined(REFINEMENT) */ + +#ifdef USE_SFR + strcpy(tag[nt], "CritOverDensity"); + addr[nt] = &All.CritOverDensity; + id[nt++] = REAL; + + strcpy(tag[nt], "TemperatureThresh"); + addr[nt] = &All.TemperatureThresh; + id[nt++] = REAL; + + strcpy(tag[nt], "CritPhysDensity"); + addr[nt] = &All.CritPhysDensity; + id[nt++] = REAL; + + strcpy(tag[nt], "FactorSN"); + addr[nt] = &All.FactorSN; + id[nt++] = REAL; + + strcpy(tag[nt], "FactorEVP"); + addr[nt] = &All.FactorEVP; + id[nt++] = REAL; + + strcpy(tag[nt], "TempSupernova"); + addr[nt] = &All.TempSupernova; + id[nt++] = REAL; + + strcpy(tag[nt], "TempClouds"); + addr[nt] = &All.TempClouds; + id[nt++] = REAL; + + strcpy(tag[nt], "MaxSfrTimescale"); + addr[nt] = &All.MaxSfrTimescale; + id[nt++] = REAL; +#endif /* #ifdef USE_SFR */ + +#ifdef MHD_SEEDFIELD + strcpy(tag[nt], "MHDSeedDir"); + addr[nt] = &All.B_dir; + id[nt++] = INT; + + strcpy(tag[nt], "MHDSeedValue"); + addr[nt] = &All.B_value; + id[nt++] = REAL; +#endif /* #ifdef MHD_SEEDFIELD */ + +#ifdef REFINEMENT_VOLUME_LIMIT + strcpy(tag[nt], "MaxVolumeDiff"); + addr[nt] = &All.MaxVolumeDiff; + id[nt++] = REAL; + + strcpy(tag[nt], "MinVolume"); + addr[nt] = &All.MinVolume; + id[nt++] = REAL; + + strcpy(tag[nt], "MaxVolume"); + addr[nt] = &All.MaxVolume; + id[nt++] = REAL; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + +#ifdef TILE_ICS + strcpy(tag[nt], "TileICsFactor"); + addr[nt] = &All.TileICsFactor; + id[nt++] = INT; +#endif /* #ifdef TILE_ICS */ + +#ifdef ADDBACKGROUNDGRID + strcpy(tag[nt], "GridSize"); + addr[nt] = &All.GridSize; + id[nt++] = INT; +#endif /* #ifdef ADDBACKGROUNDGRID */ + +#ifdef ONEDIMS_SPHERICAL + strcpy(tag[nt], "CoreRadius"); + addr[nt] = &All.CoreRadius; + id[nt++] = REAL; + + strcpy(tag[nt], "CoreMass"); + addr[nt] = &All.CoreMass; + id[nt++] = REAL; +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + if((fd = fopen(fname, "r"))) + { + sprintf(buf, "%s%s", fname, "-usedvalues"); + if(!(fdout = fopen(buf, "w"))) + { + printf("error opening file '%s' \n", buf); + errorFlag = 1; + } + else + { + printf("Obtaining parameters from file '%s':\n\n", fname); + while(!feof(fd)) + { + *buf = 0; + fgets(buf, MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 200, fd); + if(sscanf(buf, "%s%s%s", buf1, buf2, buf3) < 2) + continue; + + if(buf1[0] == '%') + continue; + + for(i = 0, j = -1; i < nt; i++) + if(strcmp(buf1, tag[i]) == 0) + { + if(param_handled[i] == 0) + { + j = i; + param_handled[i] = 1; + break; + } + else + { + j = -2; + break; + } + } + + if(j >= 0) + { + switch(id[j]) + { + case REAL: + *((double *)addr[j]) = atof(buf2); + sprintf(buf3, "%%-%ds%%g\n", MAXLEN_PARAM_TAG); + fprintf(fdout, buf3, buf1, *((double *)addr[j])); + fprintf(stdout, " "); + fprintf(stdout, buf3, buf1, *((double *)addr[j])); + break; + case STRING: + strcpy((char *)addr[j], buf2); + sprintf(buf3, "%%-%ds%%s\n", MAXLEN_PARAM_TAG); + fprintf(fdout, buf3, buf1, buf2); + fprintf(stdout, " "); + fprintf(stdout, buf3, buf1, buf2); + break; + case INT: + *((int *)addr[j]) = atoi(buf2); + sprintf(buf3, "%%-%ds%%d\n", MAXLEN_PARAM_TAG); + fprintf(fdout, buf3, buf1, *((int *)addr[j])); + fprintf(stdout, " "); + fprintf(stdout, buf3, buf1, *((int *)addr[j])); + break; + } + } + else if(j == -2) + { +#ifdef ALLOWEXTRAPARAMS + warn("Tag '%s' ignored from file %s !", buf1, fname); +#else /* #ifdef ALLOWEXTRAPARAMS */ + fprintf(stdout, "Error in file %s: Tag '%s' multiply defined.\n", fname, buf1); + errorFlag = 1; +#endif /* #ifdef ALLOWEXTRAPARAMS #else */ + } + else + { +#ifdef ALLOWEXTRAPARAMS + warn("Tag '%s' ignored from file %s !", buf1, fname); +#else /* #ifdef ALLOWEXTRAPARAMS */ + fprintf(stdout, "Error in file %s: Tag '%s' not allowed\n", fname, buf1); + errorFlag = 1; +#endif /* #ifdef ALLOWEXTRAPARAMS #else */ + } + } + fclose(fd); + fclose(fdout); + printf("\n"); + + i = strlen(All.OutputDir); + if(i > 0) + if(All.OutputDir[i - 1] != '/') + strcat(All.OutputDir, "/"); + + mkdir(All.OutputDir, 02755); + sprintf(buf1, "%s%s", fname, "-usedvalues"); + sprintf(buf2, "%s%s", All.OutputDir, "parameters-usedvalues"); + sprintf(buf3, "cp %s %s", buf1, buf2); +#ifndef NOCALLSOFSYSTEM + if(errorFlag == 0) + system(buf3); +#endif /* #ifndef NOCALLSOFSYSTEM */ + } + } + else + { + printf("Parameter file %s not found.\n", fname); + errorFlag = 1; + } + + for(i = 0; i < nt; i++) + { + if(param_handled[i] != 1) + { + printf("Error. I miss a value for tag '%s' in parameter file '%s'.\n", tag[i], fname); + errorFlag = 1; + } + } + + if(All.OutputListOn && errorFlag == 0) + errorFlag += read_outputlist(All.OutputListFilename); + else + All.OutputListLength = 0; + } + + MPI_Bcast(&errorFlag, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(errorFlag) + { + MPI_Finalize(); + exit(errorFlag); + } + + All.NParameters = nt; + + /* now communicate the relevant parameters to the other processes */ + MPI_Bcast(&All, sizeof(struct global_data_all_processes), MPI_BYTE, 0, MPI_COMM_WORLD); + +#ifdef TOLERATE_WRITE_ERROR + MPI_Bcast(AlternativeOutputDir, MAXLEN_PATH, MPI_BYTE, 0, MPI_COMM_WORLD); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +#ifdef HOST_MEMORY_REPORTING + check_maxmemsize_setting(); +#endif /* #ifdef HOST_MEMORY_REPORTING */ + + mymalloc_init(); + + Parameters = (char(*)[MAXLEN_PARAM_TAG])mymalloc("Parameters", All.NParameters * MAXLEN_PARAM_TAG * sizeof(char)); + ParametersValue = (char(*)[MAXLEN_PARAM_VALUE])mymalloc("ParametersValue", All.NParameters * MAXLEN_PARAM_VALUE * sizeof(char)); + ParametersType = mymalloc("ParamtersType", All.NParameters * sizeof(char)); + + if(ThisTask == 0) + { + for(i = 0; i < All.NParameters; i++) + { + strncpy(Parameters[i], tag[i], MAXLEN_PARAM_TAG); + ParametersType[i] = id[i]; + void *tmp = ParametersValue[i]; + switch(id[i]) + { + case REAL: + *((double *)tmp) = *((double *)addr[i]); + break; + case STRING: + strncpy(tmp, addr[i], MAXLEN_PARAM_VALUE); + break; + case INT: + tmp = ParametersValue[i]; + *((int *)tmp) = *((int *)addr[i]); + break; + } + } + } + + MPI_Bcast(Parameters, sizeof(char) * All.NParameters * MAXLEN_PARAM_TAG, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(ParametersValue, sizeof(char) * All.NParameters * MAXLEN_PARAM_VALUE, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(ParametersType, sizeof(char) * All.NParameters, MPI_BYTE, 0, MPI_COMM_WORLD); + +#undef REAL +#undef STRING +#undef INT +} + +/*! \brief This function checks the consistency of the input parameters. + * + * If you encounter some possible misuse and a corresponding error message + * that is hard to interpret, a check should be placed in this function with + * a terminate statement and a clear explanation why this does not work. + * + * \return void + */ +void check_parameters() +{ + int i, errorFlag = 0; + + /* check whether time max is larger than max timestep */ + if(All.TimeMax - All.TimeBegin <= All.MaxSizeTimestep) + { + printf("PARAMETERS: check_parameters: TimeBegin = %g, TimeMax = %g, MaxSizeTimestep = %g \n", All.TimeBegin, All.TimeMax, + All.MaxSizeTimestep); + terminate( + "check_parameters: Your total runtime is smaller than the maximum allowed timestep! Choose an appropriate value for " + "MaxSizeTimestep < TimeMax-TimeBegin! \n"); + } + + /* check softening types */ + for(i = 0; i < NTYPES; i++) + { + if(All.SofteningTypeOfPartType[i] >= NSOFTTYPES || All.SofteningTypeOfPartType[i] < 0) + { + mpi_printf("SofteningTypeOfPartType% invalid (NSOFTTYPES=%d)\n", i, NSOFTTYPES); + errorFlag = 1; + } + } + + if(errorFlag) + mpi_terminate("Softening invalid!"); + + if(All.NumFilesWrittenInParallel > NTask) + { + if(ThisTask == 0) + warn("NOTICE: Reducing requested NumFilesWrittenInParallel=%d to %d\n", All.NumFilesWrittenInParallel, NTask); + All.NumFilesWrittenInParallel = NTask; + } + + if(All.NumFilesWrittenInParallel == 0) + { + mpi_printf("NOTICE: All.NumFilesWrittenInParallel has been set to be equal to the number of processors\n"); + All.NumFilesWrittenInParallel = NTask; + } + +#ifndef GRAVITY_NOT_PERIODIC + if(All.PeriodicBoundariesOn == 0) + { + mpi_terminate( + "Code was compiled with gravity periodic boundary conditions switched on.\nYou must set `PeriodicBoundariesOn=1', or " + "recompile the code.\n"); + } +#else /* #ifndef GRAVITY_NOT_PERIODIC */ + if(All.PeriodicBoundariesOn == 1) + { + mpi_terminate( + "Code was compiled with gravity periodic boundary conditions switched off.\nYou must set `PeriodicBoundariesOn=0', or " + "recompile the code.\n"); + } +#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */ + +#ifdef COOLING + if(All.CoolingOn == 0) + { + mpi_terminate("Code was compiled with cooling switched on.\nYou must set `CoolingOn=1', or recompile the code.\n"); + } +#else /* #ifdef COOLING */ + if(All.CoolingOn == 1) + { + mpi_terminate("Code was compiled with cooling switched off.\nYou must set `CoolingOn=0', or recompile the code.\n"); + } +#endif /* #ifdef COOLING #else */ + + if(All.TypeOfTimestepCriterion >= 3) + { + mpi_terminate("The specified timestep criterion\nis not valid\n"); + } + +#if(NTYPES < 6) + mpi_terminate("NTYPES < 6 is not allowed.\n"); +#endif /* #if (NTYPES < 6) */ + +#if(NTYPES > 15) + mpi_terminate("NTYPES > 15 is not supported yet.\n"); +#endif /* #if (NTYPES > 15) */ + +#if(NTYPES > 8) + if(All.ICFormat == 1 || All.ICFormat == 2) + { + mpi_terminate("NTYPES>8 is not allowed with ICFormat=%d, since the header block is limited to 256 bytes.\n", All.ICFormat); + } +#endif /* #if (NTYPES > 8) */ + +#ifdef USE_SFR + if(All.StarformationOn == 0) + { + mpi_terminate("Code was compiled with star formation switched on.\nYou must set `StarformationOn=1', or recompile the code.\n"); + } + if(All.CoolingOn == 0) + { + mpi_terminate( + "You try to use the code with star formation enabled,\nbut you did not switch on cooling.\nThis mode is not supported.\n"); + } +#else /* #ifdef USE_SFR */ + if(All.StarformationOn == 1) + { + mpi_terminate("Code was compiled with star formation switched off.\nYou must set `StarformationOn=0', or recompile the code.\n"); + } +#endif /* #ifdef USE_SFR #else */ + +#if defined(ENFORCE_JEANS_STABILITY_OF_CELLS) && defined(USE_SFR) + if(ThisTask == 0) + warn("Code was compiled with ENFORCE_JEANS_STABILITY_OF_CELLS together with another EOS. Please make sure you really want this."); +#endif /* #if defined(ENFORCE_JEANS_STABILITY_OF_CELLS) && (defined(ISOTHERM_EQS) || (defined(USE_SFR) && !defined(FM_SFR))) */ +} + +/*! \brief This function reads a table with a list of desired output times. + * + * The table does not have to be ordered in any way, but may not contain more + * than MAXLEN_OUTPUTLIST entries. + * + * \param[in] fname The file name of the outputlist. + * + * \return 0: success 1: unable to open file. + */ +int read_outputlist(char *fname) +{ + FILE *fd; + int count, flag; + char buf[512], msg[512]; + + if(!(fd = fopen(fname, "r"))) + { + printf("can't read output list in file '%s'\n", fname); + return 1; + } + + All.OutputListLength = 0; + + while(1) + { + if(fgets(buf, 500, fd) != buf) + break; + + count = sscanf(buf, " %lg %d ", &All.OutputListTimes[All.OutputListLength], &flag); + + if(count == 1) + flag = 1; + + if(count == 1 || count == 2) + { + if(All.OutputListLength >= MAXLEN_OUTPUTLIST) + { + sprintf(msg, "\ntoo many entries in output-list. You should increase MAXLEN_OUTPUTLIST=%d.\n", (int)MAXLEN_OUTPUTLIST); + terminate(msg); + } + + All.OutputListFlag[All.OutputListLength] = flag; + All.OutputListLength++; + } + } + + fclose(fd); + + printf("\nBEGRUN: found %d times in output-list.\n", All.OutputListLength); + + return 0; +} diff --git a/src/amuse/community/arepo/src/io/read_ic.c b/src/amuse/community/arepo/src/io/read_ic.c new file mode 100644 index 0000000000..97481c91ad --- /dev/null +++ b/src/amuse/community/arepo/src/io/read_ic.c @@ -0,0 +1,1900 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/read_ic.c + * \date 05/2018 + * \brief Contains the routines needed to load initial conditions. + * \details contains functions: + * void read_ic(const char *fname, int readTypes) + * MyIDType determine_ids_offset(void) + * void empty_read_buffer(enum iofields blocknr, int offset, + * int pc, int type) + * void share_particle_number_in_file(const char *fname, int + * filenr, int readTask, int lastTask, int readTypes) + * void read_file(const char *fname, int filenr, int readTask, + * int lastTask, int readTypes) + * int find_files(const char *fname) + * void distribute_file(int nfiles, int firstfile, int + * firsttask, int lasttask, int *filenr, int *master, int + * *last) + * herr_t hdf5_header_error_handler(void *unused) + * void read_header_attributes_in_hdf5(const char *fname) + * void read_header_attributes(FILE * fd) + * void swap_Nbyte(char *data, int n, int m) + * void swap_header() + * void tile_ics(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 08.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifndef IDS_OFFSET +#ifdef LONGIDS +#define IDS_OFFSET 100000000000 +#else /* #ifdef LONGIDS */ +#define IDS_OFFSET 1000000000 +#endif /* #ifdef LONGIDS #else */ +#endif /* #ifndef IDS_OFFSET */ + +#define SKIP \ + { \ + my_fread(&blksize1, sizeof(int), 1, fd); \ + } +#define SKIP2 \ + { \ + my_fread(&blksize2, sizeof(int), 1, fd); \ + } + +void read_header_attributes(FILE *fd); + +#ifdef HAVE_HDF5 +#include +void read_header_attributes_in_hdf5(const char *fname); +#endif /* #ifdef HAVE_HDF5 */ + +int num_files; + +int swap_file = 8; + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) +/*! \brief Struct containing information about the number of particles per + * particle type. + */ +static struct ntypes_data +{ + int npart[NTYPES]; +} * ntype_in_files; +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + +/*! \brief Reads initial conditions that are in one of the supported file + * formats. + * + * Snapshot files can be used as input files. However, when a + * snapshot file is used as input, not all the information in the header is + * used: THE STARTING TIME NEEDS TO BE SET IN THE PARAMETERFILE. + * Alternatively, the code can be started with restartflag 2, then snapshots + * from the code can be used as initial conditions-files without having to + * change the parameter file. For gas particles, only the internal energy is + * read, the density and mean molecular weight will be recomputed by the code. + * When InitGasTemp>0 is given, the gas temperature will be initialized to + * this value assuming a mean molecular weight either corresponding to + * complete neutrality, or full ionization. + * + * \param[in] fname File name of the ICs. + * \param[in] readTypes A bitfield that determines what particle types to + * read, only if the bit corresponding to a particle type is set, + * the corresponding data is loaded, otherwise its particle number + * is set to zero. (This is only implemented for HDF5 files.) + * + * \return void + */ +void read_ic(const char *fname, int readTypes) +{ + int i, rep, rest_files, ngroups, gr, filenr, masterTask, lastTask, groupMaster; + double u_init, molecular_weight; + char buf[500]; + double t0, t1; + + if((All.ICFormat < 1) || (All.ICFormat > 4)) + { + mpi_terminate("ICFormat=%d not supported.\n", All.ICFormat); + } + + t0 = second(); + CPU_Step[CPU_MISC] += measure_time(); + + num_files = find_files(fname); + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + ntype_in_files = mymalloc("ntype_in_files", num_files * sizeof(struct ntypes_data)); + memset(ntype_in_files, 0, num_files * sizeof(struct ntypes_data)); +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + + All.TotNumPart = 0; + + /* we repeat reading the headers of the files two times. In the first iteration, only the + * particle numbers ending up on each processor are assembled, followed by memory allocation. + * In the second iteration, the data is actually read in. + */ + for(rep = 0; rep < 2; rep++) + { + NumPart = 0; + NumGas = 0; + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + if(rep == 1) + MPI_Allreduce(MPI_IN_PLACE, ntype_in_files, num_files * NTYPES, MPI_INT, MPI_SUM, MPI_COMM_WORLD); +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + + rest_files = num_files; + while(rest_files > NTask) + { + sprintf(buf, "%s.%d", fname, ThisTask + (rest_files - NTask)); + if(All.ICFormat == 3) + sprintf(buf, "%s.%d.hdf5", fname, ThisTask + (rest_files - NTask)); + + ngroups = NTask / All.NumFilesWrittenInParallel; + if((NTask % All.NumFilesWrittenInParallel)) + ngroups++; + groupMaster = (ThisTask / ngroups) * ngroups; + + for(gr = 0; gr < ngroups; gr++) + { + if(ThisTask == (groupMaster + gr)) /* ok, it's this processor's turn */ + { + if(rep == 0) + share_particle_number_in_file(buf, ThisTask + (rest_files - NTask), ThisTask, ThisTask, readTypes); + else + read_file(buf, ThisTask + (rest_files - NTask), ThisTask, ThisTask, readTypes); + } + MPI_Barrier(MPI_COMM_WORLD); + } + + rest_files -= NTask; + } + + if(rest_files > 0) + { + distribute_file(rest_files, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask); + + if(num_files > 1) + { + sprintf(buf, "%s.%d", fname, filenr); + if(All.ICFormat == 3) + sprintf(buf, "%s.%d.hdf5", fname, filenr); + } + else + { + sprintf(buf, "%s", fname); + if(All.ICFormat == 3) + sprintf(buf, "%s.hdf5", fname); + } + + ngroups = rest_files / All.NumFilesWrittenInParallel; + if((rest_files % All.NumFilesWrittenInParallel)) + ngroups++; + + for(gr = 0; gr < ngroups; gr++) + { + if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */ + { + if(rep == 0) + share_particle_number_in_file(buf, filenr, masterTask, lastTask, readTypes); + else + read_file(buf, filenr, masterTask, lastTask, readTypes); + } + MPI_Barrier(MPI_COMM_WORLD); + } + } + + /* now do the memory allocation */ + if(rep == 0) + { + int max_load, max_sphload; + MPI_Allreduce(&NumPart, &max_load, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&NumGas, &max_sphload, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + +#ifdef GENERATE_GAS_IN_ICS + if(max_sphload < max_load) + max_sphload = max_load; +#endif /* #ifdef GENERATE_GAS_IN_ICS */ + + All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + All.MaxPartSph = max_sphload / (1.0 - 2 * ALLOC_TOLERANCE); + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + if(All.TotPartSpecial != 0) + All.MaxPartSpecial = (int)(All.TotPartSpecial); + else + terminate("Code compiled with option EXACT_GRAVITY_FOR_PARTICLE_TYPE but no particles of specified type found in ICs."); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + allocate_memory(); + + CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE); + } + } + + myfree(CommBuffer); + +#ifdef TILE_ICS + tile_ics(); +#endif /* #ifdef TILE_ICS */ + + /* this makes sure that masses are initialized in the case that the mass-block + is empty for this particle type */ + for(i = 0; i < NumPart; i++) + { + if(All.MassTable[P[i].Type] != 0) + P[i].Mass = All.MassTable[P[i].Type]; + } + + /* If we are reading in Gadget2 ICs, we need to compute the material + number from the ID */ +#ifdef READ_LEGACY_ICS + if(header.flag_entropy_instead_u) + { + sprintf(buf, "\nProblem: Legacy ICs cannot contain entropy in the u field!\n"); + terminate(buf); + } + + for(i = 0; i < NumGas; i++) + { + int j; + + double mat; + + modf(((double)(P[i].ID - EOS_ID_START)) / EOS_ID_SKIP, &mat); /* This stores the int part in variable mat and + discards the remainder */ + int imat = mat; + + SphP[i].Composition[imat] = 1.0; + } +#endif /* #ifdef READ_LEGACY_ICS */ + +#if defined(REFINEMENT) && defined(REFINEMENT_HIGH_RES_GAS) + if(RestartFlag == 0) /* All gas that is already present in the ICs is allowed to be (de-)refined */ + { + for(i = 0; i < NumGas; i++) + { + if(All.ReferenceGasPartMass == 0 || P[i].Mass < 1.2 * All.ReferenceGasPartMass) + SphP[i].AllowRefinement = 1; + } + } +#endif /* #if defined (REFINEMENT) && defined (REFINEMENT_HIGH_RES_GAS) */ + + for(i = 0; i < NumPart; i++) + P[i].SofteningType = All.SofteningTypeOfPartType[P[i].Type]; + +#ifdef GENERATE_GAS_IN_ICS + int count; + double fac, d, a, b, rho; + + if(RestartFlag == 0) + { + header.flag_entropy_instead_u = 0; + + MyIDType ids_offset = determine_ids_offset(); + + for(i = 0, count = 0; i < NumPart; i++) +#ifdef SPLIT_PARTICLE_TYPE + if((1 << P[i].Type) & (SPLIT_PARTICLE_TYPE)) +#else /* #ifdef SPLIT_PARTICLE_TYPE */ + if(P[i].Type == 1) +#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */ + count++; + + if(count) + { + domain_resize_storage(count, count, 0); + + memmove(P + count, P, sizeof(struct particle_data) * NumPart); + + NumPart += count; + NumGas += count; + + if(NumGas > All.MaxPartSph) + terminate("Task=%d ends up getting more SPH particles (%d) than allowed (%d)\n", ThisTask, NumGas, All.MaxPartSph); + +#ifdef REFINEMENT_HIGH_RES_GAS + for(i = 0; i < NumGas - count; i++) /* make sure that AllowRefinement is shifted with the particles */ + SphP[i + count].AllowRefinement = SphP[i].AllowRefinement; + for(i = 0; i < count; i++) /* by default, new cells are not allowed to be refined */ + SphP[i].AllowRefinement = 0; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + + fac = All.OmegaBaryon / All.Omega0; + rho = All.Omega0 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + int j; + + for(i = count, j = 0; i < NumPart; i++) +#ifdef SPLIT_PARTICLE_TYPE + if((1 << P[i].Type) & (SPLIT_PARTICLE_TYPE)) +#else /* #ifdef SPLIT_PARTICLE_TYPE */ + if(P[i].Type == 1) +#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */ + { + d = pow(P[i].Mass / rho, 1.0 / 3); + a = 0.5 * All.OmegaBaryon / All.Omega0 * d; + b = 0.5 * (All.Omega0 - All.OmegaBaryon) / All.Omega0 * d; + + P[j] = P[i]; + + P[j].Mass *= fac; + P[i].Mass *= (1 - fac); + P[j].Type = 0; + P[j].ID += ids_offset; + P[i].Pos[0] += a; + P[i].Pos[1] += a; + P[i].Pos[2] += a; + P[j].Pos[0] -= b; + P[j].Pos[1] -= b; + P[j].Pos[2] -= b; + +#ifdef REFINEMENT_HIGH_RES_GAS + if(P[i].Type == 1) /* also allow gas which is produced by splitting a high res DM particle to be (de-) refined */ + SphP[j].AllowRefinement = 2; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + + j++; + } + + All.MassTable[0] = 0; + +#ifdef SPLIT_PARTICLE_TYPE + for(i = 1; i < NTYPES; i++) + if((1 << i) & (SPLIT_PARTICLE_TYPE)) + All.MassTable[i] *= (1 - fac); +#else /* #ifdef SPLIT_PARTICLE_TYPE */ + All.MassTable[1] *= (1 - fac); +#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */ + } + } +#endif /* #ifdef GENERATE_GAS_IN_ICS */ + +#ifdef READ_DM_AS_GAS + { + domain_resize_storage(0, NumPart, 0); + + if(NumGas > All.MaxPartSph) + terminate("Task=%d ends up getting more SPH particles (%d) than allowed (%d)\n", ThisTask, NumGas, All.MaxPartSph); + + for(i = 0; i < NumPart; i++) + { + P[i].Type = 0; + SphP[i].Utherm = 1.0; + } + + All.MassTable[0] = 0; + + header.npartTotal[0] = header.npartTotal[1]; + header.npartTotalHighWord[0] = header.npartTotalHighWord[1]; + header.npart[0] = header.npart[1]; + header.npartTotal[1] = 0; + header.npartTotalHighWord[1] = 0; + header.npart[1] = 0; + NumGas = NumPart; + All.TotNumGas = All.TotNumPart; + mpi_printf("READ_DM_AS_GAS: generated %lld gas particles from type %d\n", + header.npartTotal[0] + (((long long)header.npartTotalHighWord[0]) << 32), 0); + } +#endif /* #ifdef READ_DM_AS_GAS */ + +#ifdef USE_SFR + if(RestartFlag == 0) + { + if(All.MassTable[4] == 0 && All.MassTable[0] > 0) + { + All.MassTable[0] = 0; + All.MassTable[4] = 0; + } + } +#endif + + u_init = (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.InitGasTemp; + u_init *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; /* unit conversion */ + + if(All.InitGasTemp > 1.0e4) /* assuming FULL ionization */ + molecular_weight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); + else /* assuming NEUTRAL GAS */ + molecular_weight = 4 / (1 + 3 * HYDROGEN_MASSFRAC); + + u_init /= molecular_weight; + + All.InitGasU = u_init; + + header.mass[0] = 0; /* to make sure that the variable masses are stored in output file */ + All.MassTable[0] = 0; + + if(RestartFlag == 0) + { +#if defined(REFINEMENT_HIGH_RES_GAS) + for(i = 0; i < NumGas; i++) + if(SphP[i].AllowRefinement) + SphP[i].HighResMass = P[i].Mass; + else + SphP[i].HighResMass = 0; +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + + if(All.InitGasTemp > 0) + { + for(i = 0; i < NumGas; i++) + { + if(ThisTask == 0 && i == 0 && SphP[i].Utherm == 0) + printf("READIC: Initializing u from InitGasTemp!\n"); + + if(SphP[i].Utherm == 0) + SphP[i].Utherm = All.InitGasU; + /* Note: the coversion to entropy will be done in the function init(), + after the densities have been computed */ + } + } + } + + for(i = 0; i < NumGas; i++) + { + SphP[i].Utherm = dmax(All.MinEgySpec, SphP[i].Utherm); + if(SphP[i].Density > 0) + SphP[i].Volume = P[i].Mass / SphP[i].Density; + } + + MPI_Barrier(MPI_COMM_WORLD); + + t1 = second(); + mpi_printf("READIC: reading done (took %g sec).\n", timediff(t0, t1)); + + /* verify number of particles */ + int num = 0; + long long glob_num; + for(i = 0; i < NumPart; i++) + num += 1; + sumup_large_ints(1, &num, &glob_num); + if(glob_num != All.TotNumPart) + terminate("glob_num (=%lld) != All.TotNumPart (=%lld)", glob_num, All.TotNumPart); + + mpi_printf("READIC: Total number of particles : %lld\n\n", All.TotNumPart); + + CPU_Step[CPU_SNAPSHOT] += measure_time(); +} + +/*! \brief This function computes a suitable offset for the particle IDs in + * case gas should be generated in the ICs. + * + * If the macro OFFSET_FOR_NON_CONTIGUOUS_IDS is not defined the code reverts + * to a fixed offset defined at the beginning of the file. + * + * \return Offset for the gas particles to be generated. + */ +MyIDType determine_ids_offset(void) +{ +#ifndef OFFSET_FOR_NON_CONTIGUOUS_IDS + MyIDType ids_offset = IDS_OFFSET; +#else /* #ifndef OFFSET_FOR_NON_CONTIGUOUS_IDS */ + if(All.MaxID == 0) /* MaxID not calculated yet */ + calculate_maxid(); + + int bits_used = 1; + int bits_available = CHAR_BIT * sizeof(MyIDType); + MyIDType ids_offset = 1; + + while(ids_offset <= All.MaxID && ids_offset > 0) + { + ids_offset <<= 1; + bits_used++; + } + + All.MaxID = 0; /* reset to allow recomputing */ + + if(ids_offset <= 0) + terminate("not enough memory to generate id offsets. Used %d bits out of %d\n", bits_used, bits_available); + +#ifdef LONGIDS + mpi_printf("GENERATE_GAS_IN_ICS: determined id offset as %llu. Used %d bits out of %d\n", ids_offset, bits_used, bits_available); +#else /* #ifdef LONGIDS */ + mpi_printf("GENERATE_GAS_IN_ICS: determined id offset as %u. Used %d bits out of %d\n", ids_offset, bits_used, bits_available); +#endif /* #ifdef LONGIDS #else */ + +#endif /* #ifndef OFFSET_FOR_NON_CONTIGUOUS_IDS */ + return ids_offset; +} + +/*! \brief Reads out the io buffer that was filled with particle data. + * + * The data in the io buffer is put in the appropriate places of the particle + * structures. + * + * \param[in] blocknr Data block present in io buffer. + * \param[in] offset Particle corresponding to the first element in io buffer. + * \param[in] pc Number of elements in the io buffer. + * \param[in] type If blocknr=IO_POS P[n].Type is set to type. + * + * \return void + */ +void empty_read_buffer(enum iofields blocknr, int offset, int pc, int type) +{ + int n, k; + MyInputFloat *fp; + double *doublep; + MyIDType *ip; + int *intp; + float *floatp; + + int vt, vpb; + char *cp; + + fp = (MyInputFloat *)CommBuffer; + doublep = (double *)CommBuffer; + ip = (MyIDType *)CommBuffer; + intp = (int *)CommBuffer; + floatp = (float *)CommBuffer; + + cp = (char *)CommBuffer; + vt = get_datatype_in_block(blocknr, 1); + vpb = get_values_per_blockelement(blocknr); + if(vt == 2) + swap_Nbyte(cp, pc * vpb, 8); + else + { +#ifdef INPUT_IN_DOUBLEPRECISION + if(vt == 1) + swap_Nbyte(cp, pc * vpb, 8); + else +#endif /* #ifdef INPUT_IN_DOUBLEPRECISION */ + swap_Nbyte(cp, pc * vpb, 4); + } + + int field = -1; + int f; + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + field = f; + break; + } + } + + if(field < 0) + terminate("error: field not found"); + + for(n = 0; n < pc; n++) + { + if(IO_Fields[field].io_func) + { + int particle; + switch(IO_Fields[field].array) + { + case A_NONE: + case A_SPHP: + case A_P: + particle = offset + n; + break; + case A_PS: + terminate("Not good, trying to read into PS[]?\n"); + break; + default: + terminate("ERROR in empty_read_buffer: Array not found!\n"); + break; + } + + switch(IO_Fields[field].type_in_file_input) + { + case FILE_NONE: + terminate("error"); + break; + case FILE_INT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, intp, 1); + intp += IO_Fields[field].values_per_block; + break; + case FILE_MY_ID_TYPE: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, ip, 1); + ip += IO_Fields[field].values_per_block; + break; + case FILE_MY_IO_FLOAT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, fp, 1); + fp += IO_Fields[field].values_per_block; + break; + case FILE_DOUBLE: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, doublep, 1); + doublep += IO_Fields[field].values_per_block; + break; + case FILE_FLOAT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, floatp, 1); + floatp += IO_Fields[field].values_per_block; + break; + } + } + else + { + void *array_pos; + switch(IO_Fields[field].array) + { + case A_NONE: + array_pos = 0; + break; + case A_SPHP: + array_pos = SphP + offset + n; + break; + case A_P: + array_pos = P + offset + n; + break; + case A_PS: + terminate("Not good, trying to read into PS[]?\n"); + break; + default: + terminate("ERROR in empty_read_buffer: Array not found!\n"); + break; + } + + for(k = 0; k < IO_Fields[field].values_per_block; k++) + { + double value = 0; + switch(IO_Fields[field].type_in_file_input) + { + case FILE_MY_IO_FLOAT: + value = *fp; + fp++; + break; + case FILE_DOUBLE: + value = *doublep; + doublep++; + break; + case FILE_FLOAT: + value = *floatp; + floatp++; + break; + default: + break; + } + + switch(IO_Fields[field].type_in_memory) + { + case MEM_INT: + *((int *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(int))) = *intp; + intp++; + break; + case MEM_MY_ID_TYPE: + *((MyIDType *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyIDType))) = *ip; + ip++; + break; + case MEM_FLOAT: + *((float *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(float))) = value; + break; + + case MEM_DOUBLE: + *((double *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(double))) = value; + break; + + case MEM_MY_SINGLE: + *((MySingle *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MySingle))) = value; + break; + + case MEM_MY_FLOAT: + *((MyFloat *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyFloat))) = value; + break; + + case MEM_MY_DOUBLE: + *((MyDouble *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyDouble))) = value; + break; + + default: + terminate("ERROR in empty_read_buffer: Type not found!\n"); + break; + } + } + } + } + + if(blocknr == IO_VEL) + { + for(n = 0; n < pc; n++) + P[offset + n].Type = type; /* initialize type here as well */ + } +} + +/*! \brief Distributes the particle numbers in the file fname + * to tasks 'readTask' to 'lastTask', and calculates the number of + * particles each task gets. + * + * \param[in] fname Filename to be read. + * \param[in] readTask Task responsible for reading the file fname. + * \param[in] lastTask Last task which gets data contained in the file. + * \param[in] readTypes A bitfield that determines what particle types to + * read, only if the bit corresponding to a particle type is set, + * the corresponding data is loaded, otherwise its particle number + * is set to zero. (This is only implemented for HDF5 files.) + * + * \return void + */ +void share_particle_number_in_file(const char *fname, int filenr, int readTask, int lastTask, int readTypes) +{ + int i, n_in_file, n_for_this_task, ntask, task; + int blksize1, blksize2; + MPI_Status status; + FILE *fd = 0; + int type; + char label[4], buf[500]; + int nextblock; +#ifdef HAVE_HDF5 + hid_t hdf5_file = 0, hdf5_grp[NTYPES]; +#endif /* #ifdef HAVE_HDF5 */ + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + if(!(fd = fopen(fname, "r"))) + { + sprintf(buf, "can't open file `%s' for reading initial conditions.\n", fname); + terminate(buf); + } + + if(All.ICFormat == 2) + { + SKIP; + swap_file = blksize1; + my_fread(&label, sizeof(char), 4, fd); + my_fread(&nextblock, sizeof(int), 1, fd); + swap_Nbyte((char *)&nextblock, 1, 4); + printf("Reading header => '%c%c%c%c' (%d byte)\n", label[0], label[1], label[2], label[3], nextblock); + SKIP2; + } + + SKIP; + if(All.ICFormat == 1) + { + if(blksize1 != 256) + swap_file = 1; + } + read_header_attributes(fd); + SKIP2; + swap_Nbyte((char *)&blksize1, 1, 4); + swap_Nbyte((char *)&blksize2, 1, 4); + + if(blksize1 != 256 || blksize2 != 256) + terminate("incorrect header format blocksize %d, %d\n", blksize1, blksize2); + + swap_header(); + +#ifdef COMBINETYPES + header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5]; + header.npart[3] += header.npart[4] + header.npart[5]; + header.npartTotal[4] = 0; + header.npartTotal[5] = 0; + header.npart[4] = 0; + header.npart[5] = 0; +#endif /* #ifdef COMBINETYPES */ + } + +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + { + read_header_attributes_in_hdf5(fname); + + hdf5_file = my_H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + if(hdf5_file < 0) + terminate("cannot read initial conditions file %s", fname); + + for(type = 0; type < NTYPES; type++) + { + if(header.npart[type] > 0 && (readTypes & (1 << type))) + { + sprintf(buf, "/PartType%d", type); + hdf5_grp[type] = my_H5Gopen(hdf5_file, buf); + } + if(!(readTypes & (1 << type))) + { + // Override particle number in file. If we don't + // read the type, both npart and npartTotal will be 0 + header.npartTotal[type] = 0; + header.npart[type] = 0; + header.npartTotalHighWord[type] = 0; + header.mass[type] = 0; + } + } + } +#endif /* #ifdef HAVE_HDF5 */ + + for(task = readTask + 1; task <= lastTask; task++) + { + MPI_Ssend(&header, sizeof(header), MPI_BYTE, task, TAG_HEADER, MPI_COMM_WORLD); + MPI_Ssend(&swap_file, sizeof(swap_file), MPI_BYTE, task, TAG_KEY, MPI_COMM_WORLD); + } + } + else + { + MPI_Recv(&header, sizeof(header), MPI_BYTE, readTask, TAG_HEADER, MPI_COMM_WORLD, &status); + MPI_Recv(&swap_file, sizeof(swap_file), MPI_BYTE, readTask, TAG_KEY, MPI_COMM_WORLD, &status); + } + + if(header.num_files != num_files) + warn("header.num_files=%d != num_files=%d", header.num_files, num_files); + + if(All.TotNumPart == 0) + { + if(num_files == 1) + for(type = 0; type < NTYPES; type++) + { + if(header.npartTotal[type] != header.npart[type]) + { + warn("header.npartTotal[%d]=%d != header.npart[%d]=%d, setting header.npartTotal[%d] = header.npart[%d]\n", type, + header.npartTotal[type], type, header.npart[type], type, type); + header.npartTotal[type] = header.npart[type]; + } +#ifdef USE_SFR + header.npartTotalHighWord[type] = 0; +#endif + } + + All.TotNumGas = header.npartTotal[0] + (((long long)header.npartTotalHighWord[0]) << 32); +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + All.TotPartSpecial = header.npartTotal[EXACT_GRAVITY_FOR_PARTICLE_TYPE] + + (((long long)header.npartTotalHighWord[EXACT_GRAVITY_FOR_PARTICLE_TYPE]) << 32); + mpi_printf("Tot Special %d %d %d %d\n", All.TotPartSpecial, EXACT_GRAVITY_FOR_PARTICLE_TYPE, header.npart[4], + header.npartTotal[4]); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + + for(type = 0, All.TotNumPart = 0; type < NTYPES; type++) + { + All.TotNumPart += header.npartTotal[type]; + All.TotNumPart += (((long long)header.npartTotalHighWord[type]) << 32); + } + +#ifdef GENERATE_GAS_IN_ICS + if(RestartFlag == 0) + { + if(All.TotNumGas > 0) + terminate("You specified GENERATE_GAS_IN_ICS but your ICs already contain gas! (namely %lld gas cells)\n", All.TotNumGas); + +#ifdef SPLIT_PARTICLE_TYPE + for(i = 0; i < NTYPES; i++) + if((1 << i) & (SPLIT_PARTICLE_TYPE)) + { + All.TotNumGas += header.npartTotal[i] + (((long long)header.npartTotalHighWord[i]) << 32); + All.TotNumPart += header.npartTotal[i] + (((long long)header.npartTotalHighWord[i]) << 32); + mpi_printf("GENERATE_GAS_IN_ICS: generated %lld gas particles from type %d\n", + header.npartTotal[i] + (((long long)header.npartTotalHighWord[i]) << 32), i); + } +#else /* #ifdef SPLIT_PARTICLE_TYPE */ + All.TotNumGas += header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32); + All.TotNumPart += header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32); + mpi_printf("GENERATE_GAS_IN_ICS: generated %lld gas particles from type 1\n", + header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32)); +#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */ + } +#endif /* #ifdef GENERATE_GAS_IN_ICS */ + +#ifdef TILE_ICS + All.TotNumPart *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; + All.TotNumGas *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + All.TotPartSpecial *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ +#endif /* #ifdef TILE_ICS */ + + for(i = 0; i < NTYPES; i++) + All.MassTable[i] = header.mass[i]; + + if(RestartFlag >= 2) + All.Time = All.TimeBegin = header.time; + else + All.Time = All.TimeBegin; + + set_cosmo_factors_for_current_time(); + } + + if(ThisTask == readTask) + { + for(type = 0, n_in_file = 0; type < NTYPES; type++) + n_in_file += header.npart[type]; + + printf("READIC: Reading file `%s' on task=%d and distribute it to %d to %d (contains %d particles).\n", fname, ThisTask, + readTask, lastTask, n_in_file); + + myflush(stdout); + } + + for(type = 0; type < NTYPES; type++) + { + n_in_file = header.npart[type]; + ntask = lastTask - readTask + 1; + n_for_this_task = n_in_file / ntask; + if((ThisTask - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + NumPart += n_for_this_task; + + if(type == 0) + NumGas += n_for_this_task; + } + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + fclose(fd); +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + { + for(type = NTYPES - 1; type >= 0; type--) + if(header.npart[type] > 0) + { + sprintf(buf, "/PartType%d", type); + my_H5Gclose(hdf5_grp[type], buf); + } + my_H5Fclose(hdf5_file, fname); + } +#endif /* #ifdef HAVE_HDF5 */ + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + for(int type = 0; type < NTYPES; type++) + ntype_in_files[filenr].npart[type] = header.npart[type]; +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + } +} + +/*! \brief Reads a single snapshot file. + * + * This routine reads a single file. The data it contains is + * distributed to tasks 'readTask' to 'lastTask'. + * + * \param[in] fname Filename to be read. + * \param[in] readTask Task responsible for reading the file fname + * \param[in] lastTask Last task which gets data contained in the file + * \param[in] readTypes readTypes is a bitfield that determines what particle + * types to read, only if the bit corresponding to a particle type + * is set, the corresponding data is loaded, otherwise its particle + * number is set to zero. (This is only implemented for HDF5 + * files.) + * + * \return void + */ +void read_file(const char *fname, int filenr, int readTask, int lastTask, int readTypes) +{ + int blockmaxlen; + int n_in_file, n_for_this_task, ntask, pc, offset = 0, task; + int blksize1, blksize2; + MPI_Status status; + FILE *fd = 0; + int nall; + int type, bnr; + char label[4], expected_label[4], buf[500]; + int nstart, bytes_per_blockelement, npart, nextblock, typelist[NTYPES]; + enum iofields blocknr; + +#ifdef HAVE_HDF5 + int rank, pcsum; + hid_t hdf5_file = 0, hdf5_grp[NTYPES], hdf5_dataspace_in_file; + hid_t hdf5_datatype = 0, hdf5_dataspace_in_memory, hdf5_dataset; + hsize_t dims[2], count[2], start[2]; +#endif /* #ifdef HAVE_HDF5 */ + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + if(!(fd = fopen(fname, "r"))) + { + sprintf(buf, "can't open file `%s' for reading initial conditions.\n", fname); + terminate(buf); + } + + if(All.ICFormat == 2) + { + SKIP; + swap_file = blksize1; + my_fread(&label, sizeof(char), 4, fd); + my_fread(&nextblock, sizeof(int), 1, fd); + swap_Nbyte((char *)&nextblock, 1, 4); + SKIP2; + } + + SKIP; + if(All.ICFormat == 1) + { + if(blksize1 != 256) + swap_file = 1; + } + read_header_attributes(fd); + SKIP2; + swap_Nbyte((char *)&blksize1, 1, 4); + swap_Nbyte((char *)&blksize2, 1, 4); + + swap_header(); + +#ifdef COMBINETYPES + header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5]; + header.npart[3] += header.npart[4] + header.npart[5]; + header.npartTotal[4] = 0; + header.npartTotal[5] = 0; + header.npart[4] = 0; + header.npart[5] = 0; +#endif /* #ifdef COMBINETYPES */ + } + +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + { + read_header_attributes_in_hdf5(fname); + + hdf5_file = my_H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + if(hdf5_file < 0) + terminate("cannot read initial conditions file %s", fname); + + for(type = 0; type < NTYPES; type++) + { + if(header.npart[type] > 0 && (readTypes & (1 << type))) + { + sprintf(buf, "/PartType%d", type); + hdf5_grp[type] = my_H5Gopen(hdf5_file, buf); + } + if(!(readTypes & (1 << type))) + { + // Override particle number in file. If we don't + // read the type, both npart and npartTotal will be 0 + header.npartTotal[type] = 0; + header.npart[type] = 0; + header.npartTotalHighWord[type] = 0; + header.mass[type] = 0; + } + } + } +#endif /* #ifdef HAVE_HDF5 */ + + for(task = readTask + 1; task <= lastTask; task++) + MPI_Ssend(&header, sizeof(header), MPI_BYTE, task, TAG_HEADER, MPI_COMM_WORLD); + } + else + MPI_Recv(&header, sizeof(header), MPI_BYTE, readTask, TAG_HEADER, MPI_COMM_WORLD, &status); + +#ifdef INPUT_IN_DOUBLEPRECISION + if(header.flag_doubleprecision == 0) + { + sprintf(buf, "\nProblem: Code compiled with INPUT_IN_DOUBLEPRECISION, but input files are in single precision!\n"); + terminate(buf); + } +#else /* #ifdef INPUT_IN_DOUBLEPRECISION */ + if(header.flag_doubleprecision) + { + sprintf(buf, "\nProblem: Code not compiled with INPUT_IN_DOUBLEPRECISION, but input files are in double precision!\n"); + terminate(buf); + } +#endif /* #ifdef INPUT_IN_DOUBLEPRECISION #else */ + + if(ThisTask == readTask) + { + if(filenr == 0) + mpi_printf( + "\nREADIC: filenr=%d, '%s' contains:\n" + "READIC: Type 0 (gas): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 1 (halo): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 2 (disk): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 3 (bulge): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 4 (stars): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 5 (bndry): %8d (tot=%15lld) masstab= %g\n\n", + filenr, fname, header.npart[0], header.npartTotal[0] + (((long long)header.npartTotalHighWord[0]) << 32), All.MassTable[0], + header.npart[1], header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32), All.MassTable[1], + header.npart[2], header.npartTotal[2] + (((long long)header.npartTotalHighWord[2]) << 32), All.MassTable[2], + header.npart[3], header.npartTotal[3] + (((long long)header.npartTotalHighWord[3]) << 32), All.MassTable[3], + header.npart[4], header.npartTotal[4] + (((long long)header.npartTotalHighWord[4]) << 32), All.MassTable[4], + header.npart[5], header.npartTotal[5] + (((long long)header.npartTotalHighWord[5]) << 32), All.MassTable[5]); + } + + /* to collect the gas particles all at the beginning (in case several + snapshot files are read on the current CPU) we move the collisionless + particles such that a gap of the right size is created */ + + for(type = 0, nall = 0; type < NTYPES; type++) + { + n_in_file = header.npart[type]; + ntask = lastTask - readTask + 1; + n_for_this_task = n_in_file / ntask; + if((ThisTask - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + nall += n_for_this_task; + } + + memmove(&P[NumGas + nall], &P[NumGas], (NumPart - NumGas) * sizeof(struct particle_data)); + nstart = NumGas; + + for(bnr = 0; bnr < 1000; bnr++) + { + blocknr = (enum iofields)bnr; + + if(blocknr == IO_LASTENTRY) + { +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + int pc = nstart; + + for(int type = 0; type < NTYPES; type++) + { + int n_in_file = header.npart[type]; + + long long nprevious = 0; + for(int t = 0; t < type; t++) + nprevious += header.npartTotal[t] + (((long long)header.npartTotalHighWord[t]) << 32); + + for(int nr = 0; nr < filenr; nr++) + nprevious += ntype_in_files[nr].npart[type]; + + for(int task = readTask; task <= lastTask; task++) + { + int n_for_this_task = n_in_file / ntask; + if((task - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + if(ThisTask == task) + { + for(int i = 0; i < n_for_this_task; i++) + P[pc++].FileOrder = nprevious++; + } + else + nprevious += n_for_this_task; + } + } +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + break; + } + + /* proceed reading this field only if we are expecting it */ + if(blockpresent(blocknr, 0)) + { + if(ThisTask == readTask) + { + get_dataset_name(blocknr, buf); + if(filenr == 0) + mpi_printf("READIC: reading block %d (%s)...\n", blocknr, buf); + myflush(stdout); + } + + bytes_per_blockelement = get_bytes_per_blockelement(blocknr, 1); + + blockmaxlen = (int)(COMMBUFFERSIZE / bytes_per_blockelement); + + npart = get_particles_in_block(blocknr, &typelist[0]); + + if(npart > 0) + { + if(ThisTask == readTask) + { + if(All.ICFormat == 2) + { + SKIP; + my_fread(&label, sizeof(char), 4, fd); + my_fread(&nextblock, sizeof(int), 1, fd); + swap_Nbyte((char *)&nextblock, 1, 4); + printf("Reading header => '%c%c%c%c' (%d byte)\n", label[0], label[1], label[2], label[3], nextblock); + SKIP2; + + get_Tab_IO_Label(blocknr, expected_label); + if(strncmp(label, expected_label, 4) != 0) + { + sprintf(buf, "incorrect block-structure!\nexpected '%c%c%c%c' but found '%c%c%c%c'\n", expected_label[0], + expected_label[1], expected_label[2], expected_label[3], label[0], label[1], label[2], label[3]); + terminate(buf); + } + } + + if(All.ICFormat == 1 || All.ICFormat == 2) + SKIP; + } + + for(type = 0, offset = 0; type < NTYPES; type++) + { + n_in_file = header.npart[type]; +#ifdef HAVE_HDF5 + pcsum = 0; +#endif /* #ifdef HAVE_HDF5 */ + if(typelist[type] == 0) + { + /* we are expecting (npart>0) this block, but not for this particle type */ + n_for_this_task = n_in_file / ntask; + if((ThisTask - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + offset += n_for_this_task; + } + else + { + /* we are expecting (npart>0) this block for this particle type, read or recv */ + for(task = readTask; task <= lastTask; task++) + { + n_for_this_task = n_in_file / ntask; + if((task - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + if(task == ThisTask) + if(NumPart + n_for_this_task > All.MaxPart) + terminate("too many particles. %d %d %d\n", NumPart, n_for_this_task, All.MaxPart); + + /* blocked load to fit in finite size of CommBuffer */ + do + { + pc = n_for_this_task; + + if(pc > blockmaxlen) + pc = blockmaxlen; + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + my_fread(CommBuffer, bytes_per_blockelement, pc, fd); +#ifdef HAVE_HDF5 + if(All.ICFormat == 3 && pc > 0) + { + /* configure HDF5 dataspaces and hyperslab selection */ + dims[0] = header.npart[type]; + dims[1] = get_values_per_blockelement(blocknr); + if(dims[1] == 1) + rank = 1; + else + rank = 2; + + hdf5_dataspace_in_file = my_H5Screate_simple(rank, dims, NULL); + + dims[0] = pc; + hdf5_dataspace_in_memory = my_H5Screate_simple(rank, dims, NULL); + + start[0] = pcsum; + start[1] = 0; + + count[0] = pc; + count[1] = get_values_per_blockelement(blocknr); + pcsum += pc; + + my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL); + + switch(get_datatype_in_block(blocknr, 1)) + { + case FILE_INT: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT); + break; + case FILE_MY_IO_FLOAT: +#ifdef INPUT_IN_DOUBLEPRECISION + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); +#else /* #ifdef INPUT_IN_DOUBLEPRECISION */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); +#endif /* #ifdef INPUT_IN_DOUBLEPRECISION #else */ + break; + case FILE_MY_ID_TYPE: +#ifdef LONGIDS + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT64); +#else /* #ifdef LONGIDS */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT32); +#endif /* #ifdef LONGIDS #else */ + break; + case FILE_DOUBLE: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); + break; + case FILE_FLOAT: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); + break; + default: + terminate("can't process this input type"); + break; + } + + /* test if HDF5 dataset is actually present */ + get_dataset_name(blocknr, buf); + + hdf5_dataset = my_H5Dopen_if_existing(hdf5_grp[type], buf); + + if(hdf5_dataset < 0) + { + // no, pad with zeros + if((ThisTask == readTask) && (task == ThisTask)) + mpi_printf("\tDataset %s not present for particle type %d, using zero.\n", buf, type); + memset(CommBuffer, 0, dims[0] * dims[1] * my_H5Tget_size(hdf5_datatype)); + } + else + { + // yes, read into CommBuffer + my_H5Dread(hdf5_dataset, hdf5_datatype, hdf5_dataspace_in_memory, hdf5_dataspace_in_file, + H5P_DEFAULT, CommBuffer, buf); + my_H5Dclose(hdf5_dataset, buf); + } + my_H5Tclose(hdf5_datatype); + my_H5Sclose(hdf5_dataspace_in_memory, H5S_SIMPLE); + my_H5Sclose(hdf5_dataspace_in_file, H5S_SIMPLE); + + } /* All.ICFormat == 3 */ +#endif /* #ifdef HAVE_HDF5 */ + } + + if(ThisTask == readTask && task != readTask && pc > 0) + MPI_Ssend(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA, MPI_COMM_WORLD); + + if(ThisTask != readTask && task == ThisTask && pc > 0) + MPI_Recv(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, readTask, TAG_PDATA, MPI_COMM_WORLD, + &status); + + /* copy CommBuffer contents into actual particle data structs */ + if(ThisTask == task) + { + empty_read_buffer(blocknr, nstart + offset, pc, type); + + offset += pc; + } + + n_for_this_task -= pc; + } /* do */ + while(n_for_this_task > 0); + + } /* task loop */ + } /* typelist[type] > 0 */ + } /* type loop */ + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + SKIP2; + swap_Nbyte((char *)&blksize1, 1, 4); + swap_Nbyte((char *)&blksize2, 1, 4); + if(blksize1 != blksize2) + { + sprintf(buf, "incorrect block-sizes detected!\n Task=%d blocknr=%d blksize1=%d blksize2=%d\n", ThisTask, + blocknr, blksize1, blksize2); + if(blocknr == IO_ID) + { + strcat(buf, "Possible mismatch of 32bit and 64bit ID's in IC file and AREPO compilation !\n"); + } + terminate(buf); + } + } + } + + } /* npart > 0 */ + } /* blockpresent */ + } /* blocknr loop */ + + for(type = 0; type < NTYPES; type++) + { + n_in_file = header.npart[type]; + + n_for_this_task = n_in_file / ntask; + if((ThisTask - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + NumPart += n_for_this_task; + + if(type == 0) + NumGas += n_for_this_task; + } + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + fclose(fd); +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + { + for(type = NTYPES - 1; type >= 0; type--) + if(header.npart[type] > 0) + { + sprintf(buf, "/PartType%d", type); + my_H5Gclose(hdf5_grp[type], buf); + } + my_H5Fclose(hdf5_file, fname); + } +#endif /* #ifdef HAVE_HDF5 */ + } +} + +/*! \brief Determines on how many files a given snapshot is distributed. + * + * \param[in] fname File name of the snapshot as given in the parameter file. + * + * \return Number of files; -1: could not find files. + */ +int find_files(const char *fname) +{ + FILE *fd; + char buf[200], buf1[200]; + int dummy; + + sprintf(buf, "%s.%d", fname, 0); + sprintf(buf1, "%s", fname); + + if(All.ICFormat == 3) + { + sprintf(buf, "%s.%d.hdf5", fname, 0); + sprintf(buf1, "%s.hdf5", fname); + } + +#ifndef HAVE_HDF5 + if(All.ICFormat == 3) + { + mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n"); + } +#endif /* #ifndef HAVE_HDF5 */ + + header.num_files = 0; + + if(ThisTask == 0) + { + if((fd = fopen(buf, "r"))) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + if(All.ICFormat == 2) + { + my_fread(&dummy, sizeof(dummy), 1, fd); + swap_file = dummy; + my_fread(&dummy, sizeof(dummy), 1, fd); + my_fread(&dummy, sizeof(dummy), 1, fd); + my_fread(&dummy, sizeof(dummy), 1, fd); + } + + my_fread(&dummy, sizeof(dummy), 1, fd); + if(All.ICFormat == 1) + { + if(dummy == 256) + swap_file = 8; + else + swap_file = dummy; + } + read_header_attributes(fd); + + swap_header(); + +#ifdef COMBINETYPES + header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5]; + header.npart[3] += header.npart[4] + header.npart[5]; + header.npartTotal[4] = 0; + header.npartTotal[5] = 0; + header.npart[4] = 0; + header.npart[5] = 0; +#endif /* #ifdef COMBINETYPES */ + + my_fread(&dummy, sizeof(dummy), 1, fd); + } + fclose(fd); + +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + read_header_attributes_in_hdf5(buf); +#endif /* #ifdef HAVE_HDF5 */ + } + } + + MPI_Bcast(&swap_file, sizeof(swap_file), MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(&header, sizeof(header), MPI_BYTE, 0, MPI_COMM_WORLD); + + if(header.num_files < 0) + terminate("header.num_files < 0"); + if(header.num_files > 100000) + terminate("header.num_files=%d read from %s does not make sense - header possibly corrupt.", header.num_files, buf); + if(header.num_files > 0) + return header.num_files; + + if(ThisTask == 0) + { + if((fd = fopen(buf1, "r"))) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + if(All.ICFormat == 2) + { + my_fread(&dummy, sizeof(dummy), 1, fd); + swap_file = dummy; + my_fread(&dummy, sizeof(dummy), 1, fd); + my_fread(&dummy, sizeof(dummy), 1, fd); + my_fread(&dummy, sizeof(dummy), 1, fd); + } + + my_fread(&dummy, sizeof(dummy), 1, fd); + if(All.ICFormat == 1) + { + if(dummy == 256) + swap_file = 8; + else + swap_file = dummy; + } + read_header_attributes(fd); + swap_header(); + +#ifdef COMBINETYPES + header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5]; + header.npart[3] += header.npart[4] + header.npart[5]; + header.npartTotal[4] = 0; + header.npartTotal[5] = 0; + header.npart[4] = 0; + header.npart[5] = 0; +#endif /* #ifdef COMBINETYPES */ + + my_fread(&dummy, sizeof(dummy), 1, fd); + } + fclose(fd); + +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + read_header_attributes_in_hdf5(buf1); +#endif /* #ifdef HAVE_HDF5 */ + + header.num_files = 1; + } + } + + MPI_Bcast(&swap_file, sizeof(swap_file), MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(&header, sizeof(header), MPI_BYTE, 0, MPI_COMM_WORLD); + + if(header.num_files > 0) + return header.num_files; + + mpi_terminate("\nCan't find initial conditions file, neither as '%s'\nnor as '%s'\n", buf, buf1); + return -1; +} + +/*! \brief This function assigns a certain number of tasks to each file. + * + * These tasks are containing the content of that file after the ICs have been + * read. The number of tasks per file is as homogeneous as possible. + * The number of files may at most be equal to the number of tasks. + * + * \param[in] nfiles Number of files of which the snapshot is distributed. + * \param[in] filenr Contains the file number to which this task belongs. + * \param[in] master The number of the task responsible to read the file. + * \param[in] last Number of the last task belonging to the same file as this + * task. + * + * \return void + */ +void distribute_file(int nfiles, int firstfile, int firsttask, int lasttask, int *filenr, int *master, int *last) +{ + int i, group; + int tasks_per_file = NTask / nfiles; + int tasks_left = NTask % nfiles; + + if(tasks_left == 0) + { + group = ThisTask / tasks_per_file; + *master = group * tasks_per_file; + *last = (group + 1) * tasks_per_file - 1; + *filenr = group; + return; + } + + double tpf = ((double)NTask) / nfiles; + + for(i = 0, *last = -1; i < nfiles; i++) + { + *master = *last + 1; + *last = (i + 1) * tpf; + if(*last >= NTask) + *last = *last - 1; + if(*last < *master) + terminate("last < master"); + *filenr = i; + + if(i == nfiles - 1) + *last = NTask - 1; + + if(ThisTask >= *master && ThisTask <= *last) + return; + } +} + +#ifdef HAVE_HDF5 +/*! \brief The error handler used during the loading of the hdf5 header. + * + * \param[in] unused The parameter is not used, but it is necessary for + * compatibility with the HDF5 library. + * \return 1 if the write error is tolerated, otherwise the run is terminated. + */ +herr_t hdf5_header_error_handler(void *unused) +{ +#ifdef TOLERATE_WRITE_ERROR + write_error(3, 0, 0); + return 1; +#else + terminate("Failed to read HDF5 header attribute. Probably your file is corrupt.\n"); + return 0; +#endif +} + +/*! \brief This function reads the snapshot header in case of hdf5 files + * (i.e. format 3). + * + * \param[in] fname File name of the snapshot as given in the parameter file. + * + * \return void + */ +void read_header_attributes_in_hdf5(const char *fname) +{ + hid_t hdf5_file, hdf5_headergrp, hdf5_attribute; + hssize_t scalar_attr_dim = 1; + hssize_t vector_attr_dim = NTYPES; + + hdf5_file = my_H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + hdf5_headergrp = my_H5Gopen(hdf5_file, "/Header"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumPart_ThisFile"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, header.npart, "NumPart_ThisFile", vector_attr_dim); + my_H5Aclose(hdf5_attribute, "NumPart_ThisFile"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumPart_Total"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotal, "NumPart_Total", vector_attr_dim); + my_H5Aclose(hdf5_attribute, "NumPart_Total"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumPart_Total_HighWord"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotalHighWord, "NumPart_Total_HighWord", vector_attr_dim); + my_H5Aclose(hdf5_attribute, "NumPart_Total_HighWord"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "MassTable"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, header.mass, "MassTable", vector_attr_dim); + my_H5Aclose(hdf5_attribute, "MassTable"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Time"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.time, "Time", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Time"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Redshift"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.redshift, "Redshift", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Redshift"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "BoxSize"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.BoxSize, "BoxSize", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "BoxSize"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumFilesPerSnapshot"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.num_files, "NumFilesPerSnapshot", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "NumFilesPerSnapshot"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Omega0"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.Omega0, "Omega0", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Omega0"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "OmegaLambda"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.OmegaLambda, "OmegaLambda", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "OmegaLambda"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "HubbleParam"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.HubbleParam, "HubbleParam", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "HubbleParam"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Sfr"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_sfr, "Flag_Sfr", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_Sfr"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Cooling"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_cooling, "Flag_Cooling", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_Cooling"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_StellarAge"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_stellarage, "Flag_StellarAge", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_StellarAge"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Metals"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_metals, "Flag_Metals", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_Metals"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Feedback"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_feedback, "Flag_Feedback", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_Feedback"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_DoublePrecision"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_doubleprecision, "Flag_DoublePrecision", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_DoublePrecision"); + + my_H5Gclose(hdf5_headergrp, "/Header"); + my_H5Fclose(hdf5_file, fname); +} +#endif /* #ifdef HAVE_HDF5 */ + +/*! \brief Reads the snapshot header in case of non-hdf5 files (i.e. formats 1 + * and 2). + * + * \param[in] *fd Pointer to snapshot file. + * + * \return void + */ +void read_header_attributes(FILE *fd) +{ +#ifdef NTYPES_ICS + int type; + if(RestartFlag == 0) + { + my_fread(&header_ICs, sizeof(header_ICs), 1, fd); + + for(type = 0; type < NTYPES_ICS; type++) + { + header.npart[type] = header_ICs.npart[type]; + header.mass[type] = header_ICs.mass[type]; + header.npartTotal[type] = header_ICs.npartTotal[type]; + header.npartTotalHighWord[type] = header_ICs.npartTotalHighWord[type]; + } + for(type = NTYPES_ICS; type < NTYPES; type++) + { + header.npart[type] = 0; + header.mass[type] = 0; + header.npartTotal[type] = 0; + header.npartTotalHighWord[type] = 0; + } + + header.time = header_ICs.time; + header.redshift = header_ICs.redshift; + header.flag_sfr = header_ICs.flag_sfr; + header.flag_feedback = header_ICs.flag_feedback; + header.flag_cooling = header_ICs.flag_cooling; + header.num_files = header_ICs.num_files; + header.BoxSize = header_ICs.BoxSize; + header.Omega0 = header_ICs.Omega0; + header.OmegaLambda = header_ICs.OmegaLambda; + header.HubbleParam = header_ICs.HubbleParam; + header.flag_stellarage = header_ICs.flag_stellarage; + header.flag_metals = header_ICs.flag_metals; + header.flag_entropy_instead_u = header_ICs.flag_entropy_instead_u; + header.flag_doubleprecision = header_ICs.flag_doubleprecision; + header.flag_lpt_ics = header_ICs.flag_lpt_ics; + header.lpt_scalingfactor = header_ICs.lpt_scalingfactor; + header.flag_tracer_field = header_ICs.flag_tracer_field; + header.composition_vector_length = header_ICs.composition_vector_length; + } + else + my_fread(&header, sizeof(header), 1, fd); +#else /* #ifdef NTYPES_ICS */ + my_fread(&header, sizeof(header), 1, fd); +#endif /* #ifdef NTYPES_ICS #else */ +} + +/*! \brief Swaps endiannes of data. + * + * \param[in, out] data Pointer to the data. + * \param[in] n Number of elements to swap. + * \param[in] m Size of single element to swap: int, float = 4; double = 8. + * + * \return void + */ +void swap_Nbyte(char *data, int n, int m) +{ + int i, j; + char old_data[16]; + + if(swap_file != 8) + { + for(j = 0; j < n; j++) + { + memcpy(&old_data[0], &data[j * m], m); + for(i = 0; i < m; i++) + { + data[j * m + i] = old_data[m - i - 1]; + } + } + } +} + +/*! \brief Swaps the endianness of the snapshot header. + * + * \return void + */ +void swap_header() +{ + swap_Nbyte((char *)&header.npart, NTYPES, 4); + swap_Nbyte((char *)&header.mass, NTYPES, 8); + swap_Nbyte((char *)&header.time, 1, 8); + swap_Nbyte((char *)&header.redshift, 1, 8); + swap_Nbyte((char *)&header.flag_sfr, 1, 4); + swap_Nbyte((char *)&header.flag_feedback, 1, 4); + swap_Nbyte((char *)&header.npartTotal, NTYPES, 4); + swap_Nbyte((char *)&header.flag_cooling, 1, 4); + swap_Nbyte((char *)&header.num_files, 1, 4); + swap_Nbyte((char *)&header.BoxSize, 1, 8); + swap_Nbyte((char *)&header.Omega0, 1, 8); + swap_Nbyte((char *)&header.OmegaLambda, 1, 8); + swap_Nbyte((char *)&header.HubbleParam, 1, 8); + swap_Nbyte((char *)&header.flag_stellarage, 1, 4); + swap_Nbyte((char *)&header.flag_metals, 1, 4); + swap_Nbyte((char *)&header.npartTotalHighWord, NTYPES, 4); + swap_Nbyte((char *)&header.flag_entropy_instead_u, 1, 4); + swap_Nbyte((char *)&header.flag_doubleprecision, 1, 4); + swap_Nbyte((char *)&header.flag_lpt_ics, 1, 4); + swap_Nbyte((char *)&header.lpt_scalingfactor, 1, 4); + swap_Nbyte((char *)&header.flag_tracer_field, 1, 4); + swap_Nbyte((char *)&header.composition_vector_length, 1, 4); +} + +#ifdef TILE_ICS +/*! \brief Duplicates ICs and lines TileICsFactor of them up in each dimension. + * + * \return void + */ +void tile_ics(void) +{ + mpi_printf("TILE_ICS: tiling by a factor of %d...\n", All.TileICsFactor); + + /* allocate memory for new particles */ + domain_resize_storage(NumPart * (All.TileICsFactor * All.TileICsFactor * All.TileICsFactor - 1), + NumGas * (All.TileICsFactor * All.TileICsFactor * All.TileICsFactor - 1), 0); + + /* tile gas particles at the beginning of P[] */ + int N_others = NumPart - NumGas; + memmove(&P[NumGas * All.TileICsFactor * All.TileICsFactor * All.TileICsFactor], &P[NumGas], N_others * sizeof(struct particle_data)); + int i, j, ix, iy = 0, iz = 0; + for(i = 0; i < NumGas; i++) + { + for(ix = 0; ix < All.TileICsFactor; ix++) + { +#ifndef ONEDIMS + for(iy = 0; iy < All.TileICsFactor; iy++) +#endif /* #ifndef ONEDIMS */ + { +#if !defined(TWODIMS) && !defined(ONEDIMS) + for(iz = 0; iz < All.TileICsFactor; iz++) +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ + { + if(ix == 0 && iy == 0 && iz == 0) + continue; + j = i + NumGas * ix + NumGas * All.TileICsFactor * iy + NumGas * All.TileICsFactor * All.TileICsFactor * iz; + P[j] = P[i]; + P[j].ID = P[i].ID + IDS_OFFSET * ix + IDS_OFFSET * All.TileICsFactor * iy + + IDS_OFFSET * All.TileICsFactor * All.TileICsFactor * iz; + P[j].Pos[0] += All.BoxSize / All.TileICsFactor * ix; + P[j].Pos[1] += All.BoxSize / All.TileICsFactor * iy; + P[j].Pos[2] += All.BoxSize / All.TileICsFactor * iz; + SphP[j] = SphP[i]; + } + } + } + } + /* tile the other particle types */ + iy = 0; + iz = 0; + for(i = NumGas * All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; + i < NumGas * All.TileICsFactor * All.TileICsFactor * All.TileICsFactor + N_others; i++) + { + for(ix = 0; ix < All.TileICsFactor; ix++) + { +#ifndef ONEDIMS + for(iy = 0; iy < All.TileICsFactor; iy++) +#endif /* #ifndef ONEDIMS */ + { +#if !defined(TWODIMS) && !defined(ONEDIMS) + for(iz = 0; iz < All.TileICsFactor; iz++) +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ + { + if(ix == 0 && iy == 0 && iz == 0) + continue; + j = i + N_others * ix + N_others * All.TileICsFactor * iy + N_others * All.TileICsFactor * All.TileICsFactor * iz; + P[j] = P[i]; + P[j].ID = P[i].ID + IDS_OFFSET * ix + IDS_OFFSET * All.TileICsFactor * iy + + IDS_OFFSET * All.TileICsFactor * All.TileICsFactor * iz; + P[j].Pos[0] += All.BoxSize / All.TileICsFactor * ix; + P[j].Pos[1] += All.BoxSize / All.TileICsFactor * iy; + P[j].Pos[2] += All.BoxSize / All.TileICsFactor * iz; + } + } + } + } + + NumGas *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; + NumPart *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; +} +#endif /* #ifdef TILE_ICS */ diff --git a/src/amuse/community/arepo/src/io/restart.c b/src/amuse/community/arepo/src/io/restart.c new file mode 100644 index 0000000000..9a3dff5bba --- /dev/null +++ b/src/amuse/community/arepo/src/io/restart.c @@ -0,0 +1,1549 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/restart.c + * \date 05/2018 + * \brief Handling of the loading/writing of restart files. + * \details contains functions: + * void loadrestart(void) + * void reread_params_after_loading_restart(void) + * static int compare_seq_data(const void *a, const void *b) + * static void create_restartfiles_dir() + * static void get_restart_filename(char *buf, int task, + * int modus) + * static void backup_restartfiles(int task) + * static int get_file_to_check(int task) + * static void check_restart_files(char *buf, struct check *ch, + * int *success) + * static void send_work_request(int modus, int i) + * static void polling(int modus) + * static void work_files(int modus) + * void restart(int modus) + * static void write_or_read_this_processors_restart_file(int + * modus, char *buf, struct check *ch) + * static int execute_write_or_read(int modus, char *buf, + * struct check *ch) + * static void contents_restart_file(int modus) + * void readjust_timebase(double TimeMax_old, + * double TimeMax_new) + * void in(int *x, int modus) + * void byten(void *x, size_t n, int modus) + * void byten_nohash(void *x, size_t n, int modus) + * void byten_hash(void *x, size_t n, int modus, int hash) + * void allocate_iobuf(void) + * void deallocate_iobuf(int modus) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef __USE_GNU +#define _GNU_SOURCE /* needed for USE_DIRECT_IO_FOR_RESTARTS */ +#endif /* #ifndef __USE_GNU */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../debug_md5/Md5.h" +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +#define MODUS_WRITE 0 +#define MODUS_READ 1 +#define MODUS_READCHECK 2 +#define MODUS_CHECK 3 + +/*! \brief Data for scheduling restart file IO. + */ +static struct seq_data +{ + int thistask; + int rankinnode; + int thisnode; +} * seq; + +/*! \brief Metadata of restart files to be compared to when checking. + */ +static struct check +{ + long long byte_count; + unsigned char hash[16]; +} * checks; + +static char *write_success; + +static int fdint; + +static void in(int *x, int modus); +static void byten(void *x, size_t n, int modus); +static void byten_nohash(void *x, size_t n, int modus); +static void byten_hash(void *x, size_t n, int modus, int hash); +static void write_or_read_this_processors_restart_file(int modus, char *fname, struct check *ch); +static int execute_write_or_read(int modus, char *buf, struct check *ch); +static void contents_restart_file(int modus); + +#define MAX_BLOCK_SIZE (32 * 1024 * 1024) + +static int PageSize; +static char *iobuf_aligned, *io_buf; +static size_t fillp, iop; +void allocate_iobuf(void); +void deallocate_iobuf(int modus); + +static long long byte_count; +static int files_started; +static int files_completed; +static int files_concurrent; +static int files_groups; + +static MD5_CTX mysum; + +static struct global_data_all_processes all; + +/*! \brief This function loads the last restart file. + * + * Some parameters of the parameter file might be changed between restarting. + * This function ensures that only the allowed parameters change, + * otherwise the old value from the restart file is taken. + * If the end time of the simulation changed readjust_timebase() is called in + * the end. + * + * \return void + */ +void loadrestart(void) +{ + /* save global variables. (will be read from restart file) */ + all = All; + + /* Read restart files. + Note: This also resets all variables in the struct `All'. */ + restart(MODUS_READ); + + /* However, during the run, some variables in the parameter + file are allowed to be changed, if desired. These are copied here. */ + reread_params_after_loading_restart(); +} + +/*! \brief This function takes from the parameter file values that are allowed + * to change after restart. + * + * \return void + */ +void reread_params_after_loading_restart(void) +{ + if(ThisTask == 0 && All.MinSizeTimestep != all.MinSizeTimestep) + warn("MinSizeTimestep modified from %g to %g while restarting at Time=%g", All.MinSizeTimestep, all.MinSizeTimestep, All.Time); + All.MinSizeTimestep = all.MinSizeTimestep; + if(ThisTask == 0 && All.MaxSizeTimestep != all.MaxSizeTimestep) + warn("MaxSizeTimestep modified from %g to %g while restarting at Time=%g", All.MaxSizeTimestep, all.MaxSizeTimestep, All.Time); + All.MaxSizeTimestep = all.MaxSizeTimestep; + if(ThisTask == 0 && All.TimeLimitCPU != all.TimeLimitCPU) + warn("TimeLimitCPU modified from %g to %g while restarting at Time=%g", All.TimeLimitCPU, all.TimeLimitCPU, All.Time); + All.TimeLimitCPU = all.TimeLimitCPU; + if(ThisTask == 0 && All.ResubmitOn != all.ResubmitOn) + warn("ResubmitOn modified from %d to %d while restarting at Time=%g", All.ResubmitOn, all.ResubmitOn, All.Time); + All.ResubmitOn = all.ResubmitOn; + if(ThisTask == 0 && All.TimeBetSnapshot != all.TimeBetSnapshot) + warn("TimeBetSnapshot modified from %g to %g while restarting at Time=%g", All.TimeBetSnapshot, all.TimeBetSnapshot, All.Time); + All.TimeBetSnapshot = all.TimeBetSnapshot; + if(ThisTask == 0 && All.TimeBetStatistics != all.TimeBetStatistics) + warn("TimeBetStatistics modified from %g to %g while restarting at Time=%g", All.TimeBetStatistics, all.TimeBetStatistics, + All.Time); + All.TimeBetStatistics = all.TimeBetStatistics; + if(ThisTask == 0 && All.CpuTimeBetRestartFile != all.CpuTimeBetRestartFile) + warn("CpuTimeBetRestartFile modified from %g to %g while restarting at Time=%g", All.CpuTimeBetRestartFile, + all.CpuTimeBetRestartFile, All.Time); + All.CpuTimeBetRestartFile = all.CpuTimeBetRestartFile; + if(ThisTask == 0 && All.ErrTolIntAccuracy != all.ErrTolIntAccuracy) + warn("ErrTolIntAccuracy modified from %g to %g while restarting at Time=%g", All.ErrTolIntAccuracy, all.ErrTolIntAccuracy, + All.Time); + All.ErrTolIntAccuracy = all.ErrTolIntAccuracy; + if(ThisTask == 0 && All.SnapFormat != all.SnapFormat) + warn("SnapFormat modified from %d to %d while restarting at Time=%g", All.SnapFormat, all.SnapFormat, All.Time); + All.SnapFormat = all.SnapFormat; + + if(ThisTask == 0 && All.ErrTolForceAcc != all.ErrTolForceAcc) + warn("ErrTolForceAcc modified from %g to %g while restarting at Time=%g", All.ErrTolForceAcc, all.ErrTolForceAcc, All.Time); + All.ErrTolForceAcc = all.ErrTolForceAcc; + if(ThisTask == 0 && All.TypeOfTimestepCriterion != all.TypeOfTimestepCriterion) + warn("TypeOfTimestepCriterion modified from %d to %d while restarting at Time=%g", All.TypeOfTimestepCriterion, + all.TypeOfTimestepCriterion, All.Time); + All.TypeOfTimestepCriterion = all.TypeOfTimestepCriterion; + if(ThisTask == 0 && All.TypeOfOpeningCriterion != all.TypeOfOpeningCriterion) + warn("TypeOfOpeningCriterion modified from %d to %d while restarting at Time=%g", All.TypeOfOpeningCriterion, + all.TypeOfOpeningCriterion, All.Time); + All.TypeOfOpeningCriterion = all.TypeOfOpeningCriterion; + if(ThisTask == 0 && All.NumFilesWrittenInParallel != all.NumFilesWrittenInParallel) + warn("NumFilesWrittenInParallel modified from %d to %d while restarting at Time=%g", All.NumFilesWrittenInParallel, + all.NumFilesWrittenInParallel, All.Time); + All.NumFilesWrittenInParallel = all.NumFilesWrittenInParallel; + if(ThisTask == 0 && All.NumFilesPerSnapshot != all.NumFilesPerSnapshot) + warn("NumFilesPerSnapshot modified from %d to %d while restarting at Time=%g", All.NumFilesPerSnapshot, all.NumFilesPerSnapshot, + All.Time); + All.NumFilesPerSnapshot = all.NumFilesPerSnapshot; + + if(ThisTask == 0 && All.LimitUBelowThisDensity != all.LimitUBelowThisDensity) + warn("LimitUBelowThisDensity modified from %g to %g while restarting at Time=%g", All.LimitUBelowThisDensity, + all.LimitUBelowThisDensity, All.Time); + All.LimitUBelowThisDensity = all.LimitUBelowThisDensity; + if(ThisTask == 0 && All.LimitUBelowCertainDensityToThisValue != all.LimitUBelowCertainDensityToThisValue) + warn("LimitUBelowCertainDensityToThisValue modified from %g to %g while restarting at Time=%g", + All.LimitUBelowCertainDensityToThisValue, all.LimitUBelowCertainDensityToThisValue, All.Time); + All.LimitUBelowCertainDensityToThisValue = all.LimitUBelowCertainDensityToThisValue; + if(ThisTask == 0 && All.MinimumDensityOnStartUp != all.MinimumDensityOnStartUp) + warn("MinimumDensityOnStartUp modified from %g to %g while restarting at Time=%g", All.MinimumDensityOnStartUp, + all.MinimumDensityOnStartUp, All.Time); + All.MinimumDensityOnStartUp = all.MinimumDensityOnStartUp; + if(ThisTask == 0 && All.MultipleDomains != all.MultipleDomains) + warn("MultipleDomains modified from %d to %d while restarting at Time=%g", All.MultipleDomains, all.MultipleDomains, All.Time); + All.MultipleDomains = all.MultipleDomains; + if(ThisTask == 0 && All.TopNodeFactor != all.TopNodeFactor) + warn("TopNodeFactor modified from %g to %g while restarting at Time=%g", All.TopNodeFactor, all.TopNodeFactor, All.Time); + All.TopNodeFactor = all.TopNodeFactor; + if(ThisTask == 0 && All.ActivePartFracForNewDomainDecomp != all.ActivePartFracForNewDomainDecomp) + warn("ActivePartFracForNewDomainDecomp modified from %g to %g while restarting at Time=%g", All.ActivePartFracForNewDomainDecomp, + all.ActivePartFracForNewDomainDecomp, All.Time); + All.ActivePartFracForNewDomainDecomp = all.ActivePartFracForNewDomainDecomp; + if(ThisTask == 0 && All.OutputListOn != all.OutputListOn) + warn("OutputListOn modified from %d to %d while restarting at Time=%g", All.OutputListOn, all.OutputListOn, All.Time); + All.OutputListOn = all.OutputListOn; + if(ThisTask == 0 && All.CourantFac != all.CourantFac) + warn("CourantFac modified from %g to %g while restarting at Time=%g", All.CourantFac, all.CourantFac, All.Time); + All.CourantFac = all.CourantFac; +#ifdef REGULARIZE_MESH_FACE_ANGLE + if(ThisTask == 0 && All.CellMaxAngleFactor != all.CellMaxAngleFactor) + warn("CellMaxAngleFactor modified from %g to %g while restarting at Time=%g", All.CellMaxAngleFactor, all.CellMaxAngleFactor, + All.Time); + All.CellMaxAngleFactor = all.CellMaxAngleFactor; +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + if(ThisTask == 0 && All.CellShapingFactor != all.CellShapingFactor) + warn("CellShapingFactor modified from %g to %g while restarting at Time=%g", All.CellShapingFactor, all.CellShapingFactor, + All.Time); + All.CellShapingFactor = all.CellShapingFactor; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + if(ThisTask == 0 && All.CellShapingSpeed != all.CellShapingSpeed) + warn("CellShapingSpeed modified from %g to %g while restarting at Time=%g", All.CellShapingSpeed, all.CellShapingSpeed, All.Time); + All.CellShapingSpeed = all.CellShapingSpeed; + + if(ThisTask == 0 && All.OutputListLength != all.OutputListLength) + warn("OutputListLength modified from %d to %d while restarting at Time=%g", All.OutputListLength, all.OutputListLength, All.Time); + All.OutputListLength = all.OutputListLength; + if(ThisTask == 0 && memcmp(All.OutputListTimes, all.OutputListTimes, sizeof(double) * All.OutputListLength) != 0) + warn("OutputListTimes modified while restarting at Time=%g", All.Time); + memcpy(All.OutputListTimes, all.OutputListTimes, sizeof(double) * All.OutputListLength); + if(ThisTask == 0 && memcmp(All.OutputListFlag, all.OutputListFlag, sizeof(char) * All.OutputListLength) != 0) + warn("OutputListFlag modified while restarting at Time=%g", All.Time); + memcpy(All.OutputListFlag, all.OutputListFlag, sizeof(char) * All.OutputListLength); + + if(ThisTask == 0 && strcmp(All.ResubmitCommand, all.ResubmitCommand) != 0) + warn("ResubmitCommand modified from %s to %s while restarting at Time=%g", All.ResubmitCommand, all.ResubmitCommand, All.Time); + strcpy(All.ResubmitCommand, all.ResubmitCommand); + if(ThisTask == 0 && strcmp(All.OutputListFilename, all.OutputListFilename) != 0) + warn("OutputListFilename modified from %s to %s while restarting at Time=%g", All.OutputListFilename, all.OutputListFilename, + All.Time); + strcpy(All.OutputListFilename, all.OutputListFilename); + if(ThisTask == 0 && strcmp(All.OutputDir, all.OutputDir) != 0) + warn("OutputDir modified from %s to %s while restarting at Time=%g", All.OutputDir, all.OutputDir, All.Time); + strcpy(All.OutputDir, all.OutputDir); + if(ThisTask == 0 && strcmp(All.SnapshotFileBase, all.SnapshotFileBase) != 0) + warn("SnapshotFileBase modified from %s to %s while restarting at Time=%g", All.SnapshotFileBase, all.SnapshotFileBase, All.Time); + strcpy(All.SnapshotFileBase, all.SnapshotFileBase); + +#ifdef MHD_SEEDFIELD + if(ThisTask == 0 && All.B_dir != all.B_dir) + warn("B_dir modified from %d to %d while restarting at Time=%g", All.B_dir, all.B_dir, All.Time); + All.B_dir = all.B_dir; + if(ThisTask == 0 && All.B_value != all.B_value) + warn("B_value modified from %g to %g while restarting at Time=%g", All.B_value, all.B_value, All.Time); + All.B_value = all.B_value; +#endif /* #ifdef MHD_SEEDFIELD */ + + if(All.TimeMax != all.TimeMax) + { + if(ThisTask == 0) + warn("TimeMax modified from %g to %g while restarting at Time=%g", All.TimeMax, all.TimeMax, All.Time); + readjust_timebase(All.TimeMax, all.TimeMax); + } +} + +/*! \brief Sorting kernel for seq_data strucutre. + * + * Compares (top priority first) + * rankinnode + * thisnode + * thistask + * + * \return (-1,0,1), -1 if a < b. + */ +static int compare_seq_data(const void *a, const void *b) +{ + if(((struct seq_data *)a)->rankinnode < ((struct seq_data *)b)->rankinnode) + return -1; + + if(((struct seq_data *)a)->rankinnode > ((struct seq_data *)b)->rankinnode) + return +1; + + if(((struct seq_data *)a)->thisnode < ((struct seq_data *)b)->thisnode) + return -1; + + if(((struct seq_data *)a)->thisnode > ((struct seq_data *)b)->thisnode) + return +1; + + if(((struct seq_data *)a)->thistask < ((struct seq_data *)b)->thistask) + return -1; + + if(((struct seq_data *)a)->thistask > ((struct seq_data *)b)->thistask) + return +1; + + return 0; +} + +/*! \brief Creates the restart file directory with appropriate permissions. + * + * \return void + */ +static void create_restartfiles_dir() +{ + char buf[MAXLEN_PATH]; +#ifdef MULTIPLE_RESTARTS + printf(", All.RestartFileCount=%03d", All.RestartFileCount); +#endif /* #ifdef MULTIPLE_RESTARTS */ + printf(".\n"); + sprintf(buf, "%s/restartfiles", All.OutputDir); +#ifdef MULTIPLE_RESTARTS + sprintf(buf, "%s/restartfiles_%03d", All.OutputDir, All.RestartFileCount); +#endif /* #ifdef MULTIPLE_RESTARTS */ + mkdir(buf, 02755); + +#ifdef TOLERATE_WRITE_ERROR + sprintf(buf, "%s/restartfiles", AlternativeOutputDir); + mkdir(buf, 02755); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ +} + +/*! \brief Sets filename of restart file on local task. + * + * \param[out] buf Buffer to which filename is written. + * \param[in] task Task for which restart file should be written. + * \param[in] modus Read or write mode flag. + * + * \return void + */ +static void get_restart_filename(char *buf, int task, int modus) +{ + sprintf(buf, "%s/restartfiles/%s.%d", All.OutputDir, "restart", task); + +#ifdef MULTIPLE_RESTARTS + if(modus == MODUS_WRITE) + sprintf(buf, "%s/restartfiles_%03d/%s.%d", All.OutputDir, All.RestartFileCount++, "restart", task); + if((modus == MODUS_READ) || (modus == MODUS_READCHECK) || (modus == MODUS_CHECK)) + sprintf(buf, "%s/restartfiles_%03d/%s.%d", All.OutputDir, All.RestartFileCount - 1, "restart", task); +#endif /* #ifdef MULTIPLE_RESTARTS */ +} + +/*! \brief Renames existing restartfiles to backup-restartfiles. + * + * This way the code ensures that there are two sets of restart-files per + * run. + * + * \param[in] task Task for which restart file is renamed. + * + * \return void + */ +static void backup_restartfiles(int task) +{ + char buf[MAXLEN_PATH]; + + FILE *fcheck = NULL; + char buf_bak[MAXLEN_PATH]; + + int bak_files_status = 0; + + mpi_printf("RESTART: Backup restart files...\n"); + myflush(stdout); + + get_restart_filename(buf, task, MODUS_READ); + + sprintf(buf_bak, "%s/restartfiles/bak-%s.%d", All.OutputDir, "restart", ThisTask); + if((fcheck = fopen(buf, "r"))) + { + fclose(fcheck); + + rename(buf, buf_bak); + bak_files_status = 1; + } +#ifdef TOLERATE_WRITE_ERROR + char alternative_fname[MAXLEN_PATH]; + sprintf(alternative_fname, "%s/restartfiles/%s.%d", AlternativeOutputDir, "restart", ThisTask); + sprintf(buf_bak, "%s/restartfiles/bak-%s.%d", AlternativeOutputDir, "restart", ThisTask); + + if((fcheck = fopen(alternative_fname, "r"))) + { + fclose(fcheck); + + rename(alternative_fname, buf_bak); + bak_files_status = 1; + } +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + int bak_files_status_sum; + MPI_Allreduce(&bak_files_status, &bak_files_status_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(bak_files_status_sum != NTask && bak_files_status_sum != 0) + warn("RESTART: some (%d) restart files were renamed to bak, but some (%d) weren't - something is very possibly wrong!", + bak_files_status, NTask - bak_files_status); + if(bak_files_status_sum == NTask) + mpi_printf("RESTART: done renaming pre-existing restart files to bak files.\n"); + else if(bak_files_status_sum == 0) + mpi_printf("RESTART: no pre-existing restart files found.\n"); + + myflush(stdout); +} + +/*! \brief Returns the index of file which is to be checked by local task. + * + * Task gets assigned a restart file to check which it has not written. + * This is to ensure that the check is actually read from disk (not from some + * buffer). + * + * \param[in] task Local task. + * + * \return File number. + */ +static int get_file_to_check(int task) { return (task + NTask / 2) % NTask; } + +/*! \brief Checks restart files via an md5sum. + * + * This is to ensure that they have been written correctly to the file system. + * + * \param[in] buf Filename of restart file. + * \param[in] ch Metadata about data to be checked. + * \param[out] success Flag whether check was a success. + * + * \return void + */ +static void check_restart_files(char *buf, struct check *ch, int *success) +{ +#ifdef USE_DIRECT_IO_FOR_RESTARTS + struct stat st; + if(stat(buf, &st) == 0) + { + size_t size = st.st_size; + if(size % PageSize > 0) + { + FILE *fd = fopen(buf, "a"); + if(fd) + { + size_t n = PageSize - (size % PageSize); + char *p = calloc(n, 1); + if(p == NULL) + terminate("p == NULL"); + printf("RESTART: Topping of restart file '%s' by %lld bytes\n", buf, (long long)n); + fwrite(p, n, 1, fd); + fclose(fd); + free(p); + } + else + terminate("can't increase length of restart file '%s'", buf); + } + } + else + terminate("Restart file '%s' not found.\n", buf); +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + int oflag = O_RDONLY; +#ifdef USE_DIRECT_IO_FOR_RESTARTS + oflag |= O_DIRECT; +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + + if((fdint = open(buf, oflag)) < 0) + terminate("Restart file '%s' not found.\n", buf); + + allocate_iobuf(); + + MD5Init(&mysum); + + long long readLen = ch->byte_count; + while(readLen > 0) + { + int readChunk = 1024 * 1024 * 32; + if(readChunk > readLen) + readChunk = readLen; + + byten(NULL, readChunk, MODUS_CHECK); + readLen -= readChunk; + } + + MD5Final(&mysum); + + unsigned char has_hash[16], written_hash[16]; + + for(int k = 0; k < 16; k++) + has_hash[k] = mysum.digest[k]; + + byten_nohash(written_hash, 16, MODUS_READ); + + if(memcmp(has_hash, ch->hash, 16) != 0 || memcmp(has_hash, written_hash, 16) != 0) + { + char str_has[48], str_expected[48], str_written[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + sprintf(str_expected + 2 * i, "%02X", ch->hash[i]); + sprintf(str_written + 2 * i, "%02X", written_hash[i]); + } + + str_has[32] = str_expected[32] = str_written[32] = 0; + + char newname[10000]; + sprintf(newname, "%s-damaged", buf); + rename(buf, newname); + + terminate("RESTART: file '%s' has MD5 hash of '%s', does not match expected hash '%s' or written hash '%s'.", newname, str_has, + str_expected, str_written); + *success = 0; + } + else + { +#ifdef VERBOSE + char str_has[48], str_expected[48], str_written[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + sprintf(str_expected + 2 * i, "%02X", ch->hash[i]); + sprintf(str_written + 2 * i, "%02X", written_hash[i]); + } + + str_has[32] = str_expected[32] = str_written[32] = 0; + + printf("RESTART: Task %d: file '%s' has MD5 hash of '%s', does match expected hash '%s' and written hash '%s'.\n", ThisTask, buf, + str_has, str_expected, str_written); +#endif /* #ifdef VERBOSE */ + *success = 1; + } + deallocate_iobuf(MODUS_CHECK); + + close(fdint); +} + +/*! \brief Distributes information and meta-data to task that is supposed to + * check the restart file which has just been written. + * + * \param[in] modus Write or check mode. + * \param[in] i Index in seq array. + * + * \return void + */ +static void send_work_request(int modus, int i) +{ + int type = 0; + + if(modus == MODUS_WRITE) + { + if(write_success[seq[i].thistask]) + type = 1; + } + + if(modus == MODUS_CHECK) + { + int task = get_file_to_check(seq[i].thistask); + if(write_success[task]) + type = 1; + } + + MPI_Ssend(&type, 1, MPI_INT, seq[i].thistask, TAG_N, MPI_COMM_WORLD); + + if(modus == MODUS_CHECK) + { + int task = get_file_to_check(seq[i].thistask); + if(!write_success[task]) + MPI_Ssend(&checks[task], sizeof(struct check), MPI_BYTE, seq[i].thistask, TAG_N, MPI_COMM_WORLD); + } +} + +/*! \brief Gets work request. + * + * \param[in] modus Write or check files. + * + * \return void + */ +static void polling(int modus) +{ + if(ThisTask == 0) + if(files_completed < NTask) + { + MPI_Status status; + int flag; + + /* now check for a completion message */ + MPI_Iprobe(MPI_ANY_SOURCE, TAG_KEY, MPI_COMM_WORLD, &flag, &status); + + if(flag) + { + int source = status.MPI_SOURCE; + + if(modus == MODUS_WRITE) + { + MPI_Recv(&checks[source], sizeof(struct check), MPI_BYTE, source, TAG_KEY, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + else if(modus == MODUS_CHECK) + { + int success; + MPI_Recv(&success, 1, MPI_INT, source, TAG_KEY, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + write_success[get_file_to_check(source)] = success; + } + else + { + int dummy; + MPI_Recv(&dummy, 1, MPI_INT, source, TAG_KEY, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + files_completed++; + + if(files_started < NTask) + { + if((files_started % files_concurrent) == 0) + { + if(modus == MODUS_READ) + mpi_printf("RESTART: Loading restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else if(modus == MODUS_WRITE) + mpi_printf("RESTART: Writing restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else + mpi_printf("RESTART: Checking restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + } + + send_work_request(modus, files_started++); + } + } + } +} + +/*! \brief Schedule the reading/writing/checking of restart files to ensure + * only NumFilesWrittenInParallel are written in parallel. + * + * \param[in] modus Read, write or check files. + * + * \return void + */ +static void work_files(int modus) +{ + if(ThisTask == 0) + if(!(seq = malloc(NTask * sizeof(struct seq_data)))) + terminate("can't allocate seq_data"); + + struct seq_data seq_loc; + seq_loc.thistask = ThisTask; + seq_loc.rankinnode = RankInThisNode; + seq_loc.thisnode = ThisNode; + + MPI_Gather(&seq_loc, sizeof(struct seq_data), MPI_BYTE, seq, sizeof(struct seq_data), MPI_BYTE, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + qsort(seq, NTask, sizeof(struct seq_data), compare_seq_data); + if(seq[0].thistask != 0) + terminate("unexpected"); + + files_started = 0; + files_completed = 0; + + if((files_started % files_concurrent) == 0) + { + if(modus == MODUS_READ) + mpi_printf("RESTART: Loading restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else if(modus == MODUS_WRITE) + mpi_printf("RESTART: Writing restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else + mpi_printf("RESTART: Checking restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + } + + for(int i = 1; i < All.NumFilesWrittenInParallel; i++) + { + files_started++; + send_work_request(modus, i); + } + + files_started++; + if(!((modus == MODUS_WRITE && write_success[ThisTask]) || (modus == MODUS_CHECK && write_success[get_file_to_check(ThisTask)]))) + { + if(modus == MODUS_CHECK) + { + char buf[MAXLEN_PATH]; + int task = get_file_to_check(ThisTask); + get_restart_filename(buf, task, modus); + + int success; + check_restart_files(buf, &checks[task], &success); + write_success[task] = success; + } + else + { + char buf[MAXLEN_PATH]; + get_restart_filename(buf, ThisTask, modus); + write_or_read_this_processors_restart_file(modus, buf, &checks[0]); + } + } + files_completed++; + + if(files_started < NTask) + { + if((files_started % files_concurrent) == 0) + { + if(modus == MODUS_READ) + mpi_printf("RESTART: Loading restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else if(modus == MODUS_WRITE) + mpi_printf("RESTART: Writing restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else + mpi_printf("RESTART: Checking restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + } + + send_work_request(modus, files_started++); + } + + while(files_completed < NTask) + polling(modus); + + free(seq); + } + else + { + int type; + MPI_Recv(&type, 1, MPI_INT, 0, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE); /* wait until we are told to start */ + + if(type == 0) + { + if(modus == MODUS_CHECK) + { + struct check ch; + MPI_Recv(&ch, sizeof(struct check), MPI_BYTE, 0, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + char buf[MAXLEN_PATH]; + get_restart_filename(buf, get_file_to_check(ThisTask), modus); + + int success; + check_restart_files(buf, &ch, &success); + MPI_Ssend(&success, 1, MPI_INT, 0, TAG_KEY, MPI_COMM_WORLD); + } + else + { + char buf[MAXLEN_PATH]; + get_restart_filename(buf, ThisTask, modus); + struct check ch; + write_or_read_this_processors_restart_file(modus, buf, &ch); + + if(modus == MODUS_WRITE) + { + MPI_Ssend(&ch, sizeof(struct check), MPI_BYTE, 0, TAG_KEY, MPI_COMM_WORLD); + } + else + { + int dummy = 0; + MPI_Ssend(&dummy, 1, MPI_INT, 0, TAG_KEY, MPI_COMM_WORLD); + } + } + } + else + { + int dummy = 1; + MPI_Ssend(&dummy, 1, MPI_INT, 0, TAG_KEY, MPI_COMM_WORLD); + } + } +} + +/*! \brief This function reads or writes the restart files. + * + * Each processor writes its own restart file, with the + * I/O being done in parallel. To avoid congestion of the disks + * you can tell the program to restrict the number of files + * that are simultaneously written to NumFilesWrittenInParallel. + * + * \param[in] modus if modus==MODUS_READ the restart()-routine reads, + * if modus==MODUS_WRITE it writes a restart file. + * + * \return void + */ +void restart(int modus) +{ + CPU_Step[CPU_MISC] += measure_time(); + double t0 = second(); + byte_count = 0; + + PageSize = getpagesize(); + mpi_printf("RESTART: PageSize = %d\n", PageSize); + + if(modus == MODUS_READ) + mpi_printf("RESTART: Loading restart files...\n"); + + if(ThisTask == 0 && modus == MODUS_WRITE) + { + printf("RESTART: Writing restart files"); + create_restartfiles_dir(); + } + MPI_Barrier(MPI_COMM_WORLD); + + if(NTask < All.NumFilesWrittenInParallel) + { + warn("Number of processors should be a smaller or equal than `NumFilesWrittenInParallel'. We're adjusting the latter.\n"); + All.NumFilesWrittenInParallel = NTask; + } + + if(All.NumFilesWrittenInParallel < 1) + All.NumFilesWrittenInParallel = 1; + + files_concurrent = All.NumFilesWrittenInParallel; + files_groups = NTask / All.NumFilesWrittenInParallel; + if(NTask % All.NumFilesWrittenInParallel) + files_groups++; + +#ifndef MULTIPLE_RESTARTS + if(modus == MODUS_WRITE) /* write */ + backup_restartfiles(ThisTask); +#endif /* #ifndef MULTIPLE_RESTARTS */ + + if(modus == MODUS_WRITE) + if(ThisTask == 0) + { + if(!(checks = malloc(NTask * sizeof(struct check)))) + terminate("can't allocate checks"); + if(!(write_success = malloc(NTask))) + terminate("can't allocate write_success"); + + for(int i = 0; i < NTask; i++) + { + checks[i].byte_count = 0; + write_success[i] = 0; + } + } + + work_files(modus); + + MPI_Barrier(MPI_COMM_WORLD); + + if(modus == MODUS_WRITE) + { + int iter = 0; + int success = 0; + while(!success) + { + work_files(MODUS_CHECK); + + if(ThisTask == 0) + { + int count = 0; + for(int i = 0; i < NTask; i++) + { + if(!write_success[i]) + count++; + } + + if(count == 0) + { + printf("All restart files written successfully.\n"); + success = 1; + } + else + { + printf("Need to repeat writing for %d restartfiles.\n", count); + } + } + + MPI_Bcast(&success, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(success) + break; + + iter++; + if(iter > 4) + terminate("Too many iterations, fix your file system."); + + work_files(MODUS_WRITE); + }; + + free(checks); + } + + /* check whether the restarts are all at the same time */ + if(modus == MODUS_READ) /* read */ + { + struct global_data_all_processes all_task0; + + if(ThisTask == 0) + all_task0 = All; + + MPI_Bcast(&all_task0, sizeof(struct global_data_all_processes), MPI_BYTE, 0, MPI_COMM_WORLD); + + if(all_task0.Time != All.Time) + terminate("The restart file on task=%d is not consistent with the one on task=0\n", ThisTask); + } + + long long byte_count_all; + sumup_longs(1, &byte_count, &byte_count_all); + + double t1 = second(); + + mpi_printf("RESTART: load/save took %g sec, corresponds to I/O rate of %g MB/sec\n", timediff(t0, t1), + byte_count_all / (1024.0 * 1024.0) / timediff(t0, t1)); + + CPU_Step[CPU_RESTART] += measure_time(); + mpi_printf("RESTART: done.\n"); +} + +/*! \brief Reads or writes restart file. + * + * Try write until successful. + * + * \param[in] modus Flag for write or read. + * \param[in] buf File name. + * \param[in] ch Check metadata. + * + * \return void + */ +static void write_or_read_this_processors_restart_file(int modus, char *buf, struct check *ch) +{ + if(modus == MODUS_READ) + { + execute_write_or_read(MODUS_READ, buf, ch); + } + else + { + int failed = 0; + + do + { + execute_write_or_read(MODUS_WRITE, buf, ch); + } + while(failed > 0); + } +} + +/*! \brief Reads or writes a restart file. + * + * A single attempt which either is successful or fails. + * + * \param[in] modus Flag for write or read. + * \param[in] buf File name. + * \param[in] ch Check metadata. + * + * \return 0: success, 1: failed. + */ +static int execute_write_or_read(int modus, char *buf, struct check *ch) +{ + if(modus == MODUS_WRITE) + ch->byte_count = byte_count; + + int failed_flag = 0; + +#ifdef TOLERATE_WRITE_ERROR + for(int try_io = 0; try_io < 2; try_io++) + { + WriteErrorFlag = 0; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + if(modus == MODUS_READ || modus == MODUS_READCHECK) + { +#ifdef USE_DIRECT_IO_FOR_RESTARTS + struct stat st; + if(stat(buf, &st) == 0) + { + size_t size = st.st_size; + if(size % PageSize > 0) + { + FILE *fd = fopen(buf, "a"); + if(fd) + { + size_t n = PageSize - (size % PageSize); + char *p = calloc(n, 1); + if(p == NULL) + terminate("p == NULL"); + printf("RESTART: Topping of restart file '%s' by %lld bytes\n", buf, (long long)n); + fwrite(p, n, 1, fd); + fclose(fd); + free(p); + } + else + terminate("can't increase length of restart file '%s'", buf); + } + } + else + terminate("Restart file '%s' not found.\n", buf); +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + int oflag = O_RDONLY; +#ifdef USE_DIRECT_IO_FOR_RESTARTS + oflag |= O_DIRECT; +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + if((fdint = open(buf, oflag)) < 0) + terminate("Restart file '%s' not found.\n", buf); + + allocate_iobuf(); + } + else + { +#ifdef TOLERATE_WRITE_ERROR + int try_open = 0; + + while(try_open < IO_TRIALS) + { + int oflag = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef USE_DIRECT_IO_FOR_RESTARTS + oflag |= O_DIRECT; +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + if((fdint = open(buf, oflag, S_IRUSR | S_IWUSR | S_IRGRP)) < 0) + { + printf("Restart file '%s' cannot be opened. Trying again...\n", buf); + myflush(stdout); + + try_open++; + + sleep(IO_SLEEP_TIME); + } + else + break; + } + + if(try_open == IO_TRIALS) + terminate("Opening of restart file failed too often!"); +#else /* #ifdef TOLERATE_WRITE_ERROR */ + int oflag = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef USE_DIRECT_IO_FOR_RESTARTS + oflag |= O_DIRECT; +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + if((fdint = open(buf, oflag, S_IRUSR | S_IWUSR | S_IRGRP)) < 0) + terminate("Restart file '%s' cannot be opened.\n", buf); +#endif /* #ifdef TOLERATE_WRITE_ERROR #else */ + allocate_iobuf(); + } + + MD5Init(&mysum); + + contents_restart_file(modus); + + MD5Final(&mysum); + + unsigned char has_hash[16]; + static unsigned char should_hash[16]; + + for(int k = 0; k < 16; k++) + has_hash[k] = mysum.digest[k]; + + if(modus == MODUS_READ) + { + /* read */ + unsigned char written_hash[16]; + byten_nohash(written_hash, 16, modus); + if(memcmp(has_hash, written_hash, 16) != 0) + { + char str_has[48], str_written[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + sprintf(str_written + 2 * i, "%02X", written_hash[i]); + } + + str_has[32] = str_written[32] = 0; + + terminate("RESTART: file '%s' does not match expected MD5 hash of '%s', found '%s' instead.", buf, str_has, str_written); + } + } + else if(modus == MODUS_READCHECK) + { + if(memcmp(should_hash, has_hash, 16) != 0) + { + char str_should[48], str_has[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_should + 2 * i, "%02X", should_hash[i]); + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + } + + str_should[32] = str_has[32] = 0; + + failed_flag = 1; + + terminate( + "RESTART-READCHECK: file '%s' does not match expected MD5 hash of '%s' after read-back check, has '%s' instead.", + buf, str_should, str_has); + } +#ifdef VERBOSE + else + { + char str_should[48], str_has[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_should + 2 * i, "%02X", should_hash[i]); + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + } + + str_should[32] = str_has[32] = 0; + + printf("RESTART-READCHECK: Task %d: file '%s' does match expected MD5 hash of '%s' after read-back check, has '%s'.\n", + ThisTask, buf, str_should, str_has); + } +#endif /* #ifdef VERBOSE */ + } + else if(modus == MODUS_WRITE) + { + ch->byte_count = byte_count - ch->byte_count; + for(int k = 0; k < 16; k++) + ch->hash[k] = has_hash[k]; + + /* write */ + byten_nohash(has_hash, 16, modus); + + for(int k = 0; k < 16; k++) + should_hash[k] = has_hash[k]; + } + else + terminate("This should not happen - wrong modus!"); + + deallocate_iobuf(modus); + + close(fdint); + +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag == 0) + break; + + if(try_io == 0) + { + char alternative_fname[MAXLEN_PATH]; + sprintf(alternative_fname, "%s/restartfiles/%s.%d", AlternativeOutputDir, "restart", ThisTask); + + printf("TOLERATE_WRITE_ERROR: Try to write to alternative file: Task=%d try_io=%d alternative-filename='%s'\n", ThisTask, + try_io, alternative_fname); + myflush(stdout); + strncpy(buf, alternative_fname, MAXLEN_PATH); /* try on a different output directory */ + } + else + { + terminate("TOLERATE_WRITE_ERROR: Second try with alternative file failed too.\n"); + } + } +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + return failed_flag; +} + +/*! \brief Defines contents of restart file. + * + * \param[in] modus Read or write (0: write; >0 read). + * + * \return void + */ +static void contents_restart_file(int modus) +{ + /* common data */ + byten(&All, sizeof(struct global_data_all_processes), modus); + + /* individual allocation factors for meshes */ + byten(&Mesh.Indi, sizeof(struct individual_alloc_data), modus); + byten(&DeRefMesh.Indi, sizeof(struct individual_alloc_data), modus); + + polling(modus); + + if(modus == MODUS_READ) /* read */ + allocate_memory(); + + int ntask = NTask; + in(&ntask, modus); + + if(modus == MODUS_READ) + if(ntask != NTask) + terminate("The restart files were written for ntask=%d while you're using now %d MPI ranks\n", ntask, NTask); + + in(&NumPart, modus); + + /* Particle data */ + byten(&P[0], NumPart * sizeof(struct particle_data), modus); + + polling(modus); + + in(&NumGas, modus); + + if(NumGas > 0) + { + /* Sph-Particle data */ + byten(&SphP[0], NumGas * sizeof(struct sph_particle_data), modus); + } + + polling(modus); + + in(&Nvc, modus); + in(&MaxNvc, modus); + in(&FirstUnusedConnection, modus); + + if(modus == MODUS_READ) /* read */ + DC = mymalloc_movable(&DC, "DC", MaxNvc * sizeof(connection)); + + byten(DC, MaxNvc * sizeof(connection), modus); + + polling(modus); + + /* write state of random number generators */ + byten(gsl_rng_state(random_generator), gsl_rng_size(random_generator), modus); + byten(gsl_rng_state(random_generator_aux), gsl_rng_size(random_generator_aux), modus); + + /* now store variables for time integration bookkeeping */ + byten(TimeBinSynchronized, TIMEBINS * sizeof(int), modus); + + in(&TimeBinsHydro.NActiveParticles, modus); + in(&TimeBinsGravity.NActiveParticles, modus); + byten(&TimeBinsHydro.GlobalNActiveParticles, sizeof(long long), modus); + byten(&TimeBinsGravity.GlobalNActiveParticles, sizeof(long long), modus); + byten(TimeBinsHydro.ActiveParticleList, TimeBinsHydro.NActiveParticles * sizeof(int), modus); + byten(TimeBinsGravity.ActiveParticleList, TimeBinsGravity.NActiveParticles * sizeof(int), modus); + byten(TimeBinsHydro.NextInTimeBin, NumGas * sizeof(int), modus); + byten(TimeBinsGravity.NextInTimeBin, NumPart * sizeof(int), modus); + byten(TimeBinsHydro.PrevInTimeBin, NumGas * sizeof(int), modus); + byten(TimeBinsGravity.PrevInTimeBin, NumPart * sizeof(int), modus); + byten(TimeBinsHydro.TimeBinCount, TIMEBINS * sizeof(int), modus); + byten(TimeBinsGravity.TimeBinCount, TIMEBINS * sizeof(int), modus); + byten(TimeBinsHydro.FirstInTimeBin, TIMEBINS * sizeof(int), modus); + byten(TimeBinsGravity.FirstInTimeBin, TIMEBINS * sizeof(int), modus); + byten(TimeBinsHydro.LastInTimeBin, TIMEBINS * sizeof(int), modus); + byten(TimeBinsGravity.LastInTimeBin, TIMEBINS * sizeof(int), modus); + +#ifdef USE_SFR + byten(TimeBinSfr, TIMEBINS * sizeof(double), modus); +#endif + + polling(modus); + + /* now store custom data for optional Config settings */ +#ifdef USE_SFR + in(&Stars_converted, modus); +#endif + + polling(modus); + + /* now store relevant data for tree */ + + in(&NTopleaves, modus); + in(&NTopnodes, modus); + + in(&Ngb_MaxPart, modus); + in(&Ngb_MaxNodes, modus); + in(&Ngb_NumNodes, modus); + in(&Ngb_MarkerValue, modus); + in(&Ngb_FirstNonTopLevelNode, modus); + + polling(modus); + + if(modus == MODUS_READ) /* read */ + { + domain_allocate(); + ngb_treeallocate(); + } + + if(All.TotNumGas > 0) + { +#ifdef TREE_BASED_TIMESTEPS + byten(ExtNgb_Nodes + Ngb_MaxPart, Ngb_NumNodes * sizeof(struct ExtNgbNODE), modus); +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + byten(Ngb_Nodes + Ngb_MaxPart, Ngb_NumNodes * sizeof(struct NgbNODE), modus); + byten(Ngb_DomainNodeIndex, NTopleaves * sizeof(int), modus); + byten(Ngb_Nextnode, (Ngb_MaxPart + NTopleaves) * sizeof(int), modus); + byten(Ngb_Father, Ngb_MaxPart * sizeof(int), modus); + byten(Ngb_Marker, (Ngb_MaxPart + NTopleaves) * sizeof(int), modus); + } + + polling(modus); + + byten(TopNodes, NTopnodes * sizeof(struct topnode_data), modus); + byten(DomainTask, NTopleaves * sizeof(int), modus); + byten(DomainCorner, 3 * sizeof(double), modus); + byten(DomainCenter, 3 * sizeof(double), modus); + byten(&DomainLen, sizeof(double), modus); + byten(&DomainFac, sizeof(double), modus); + byten(&DomainInverseLen, sizeof(double), modus); + byten(&DomainBigFac, sizeof(double), modus); +} + +/*! \brief Adjusts the timeline if the TimeMax variable is + * increased between a restart. + * + * The approach taken here is to reduce the resolution of the + * integer timeline by factors of 2 until the new final time + * can be reached within TIMEBASE. + * + * \param[in] TimeMax_old old final time. + * \param[in] TimeMax_new new final time (must be larger than old one). + * + * \return void + */ +void readjust_timebase(double TimeMax_old, double TimeMax_new) +{ + int i; + long long ti_end; + + if(sizeof(long long) != 8) + terminate("\nType 'long long' is not 64 bit on this platform\n\n"); + + mpi_printf("\nRESTART: All.TimeMax has been changed in the parameterfile\nNeed to adjust integer timeline\n\n\n"); + + if(TimeMax_new < TimeMax_old) + terminate("\nIt is not allowed to reduce All.TimeMax\n\n"); + + if(All.ComovingIntegrationOn) + ti_end = (long long)(log(TimeMax_new / All.TimeBegin) / All.Timebase_interval); + else + ti_end = (long long)((TimeMax_new - All.TimeBegin) / All.Timebase_interval); + + while(ti_end > TIMEBASE) + { + All.Timebase_interval *= 2.0; + + ti_end /= 2; + All.Ti_Current /= 2; + All.Previous_Ti_Current /= 2; + +#ifdef PMGRID + All.PM_Ti_begstep /= 2; + All.PM_Ti_endstep /= 2; +#endif /* #ifdef PMGRID */ + + for(i = 0; i < NumPart; i++) + { + P[i].Ti_Current /= 2; + + if(P[i].TimeBinGrav > 0) + { + P[i].TimeBinGrav--; + if(P[i].TimeBinGrav <= 0) + { + char buf[1000]; + sprintf(buf, "Error in readjust_timebase(). Minimum Timebin for particle %d reached.\n", i); + terminate(buf); + } + } + + if(P[i].Type == 0) + if(P[i].TimeBinHydro > 0) + { + P[i].TimeBinHydro--; + if(P[i].TimeBinHydro <= 0) + { + char buf[1000]; + sprintf(buf, "Error in readjust_timebase(). Minimum Timebin for particle %d reached.\n", i); + terminate(buf); + } + } + } + } + + All.TimeMax = TimeMax_new; +} + +/*! \brief Reads/writes one integer to a restart file. + * + * \param[in, out] x pointer to the integer. + * \param[in] modus if modus>0 the restart()-routine reads, + * if modus==0 it writes a restart file. + * + * \return void + */ +void in(int *x, int modus) { byten(x, sizeof(int), modus); } + +/*! \brief Reads/writes n bytes to restart file buffer. + * + * \param[in, out] x Pointer to the data. + * \param[in] n Number of bytes. + * \param[in] modus If modus>0 the restart()-routine reads, + * if modus==0 it writes a restart file. + * + * \return void + */ +void byten(void *x, size_t n, int modus) { byten_hash(x, n, modus, 1); } + +/*! \brief Wrapper for byten; called with hash=0. + * + * I.e. writes something without including it in calculating the md5sum. This + * should only be done for the md5sum itself, but not for actual data. + * + * \param[in, out] x Pointer to the data. + * \param[in] n Number of bytes. + * \param[in] modus If modus>0 the restart()-routine reads, + * if modus==0 it writes a restart file. + * + * \return void + */ +void byten_nohash(void *x, size_t n, int modus) { byten_hash(x, n, modus, 0); } + +/*! \brief Reads/writes n bytes to restart file buffer. + * + * \param[in, out] x pointer to the data + * \param[in] n number of bytes + * \param[in] modus if modus>0 the restart()-routine reads, + * if modus==0 it writes a restart file. + * \param[in] hash If nonzero, this part is considered in md5sum. + * + * + * \return void + */ +void byten_hash(void *x, size_t n, int modus, int hash) +{ + byte_count += n; + + if(n > 0) + { + size_t nin = n; + + if(modus == MODUS_READ || modus == MODUS_READCHECK || modus == MODUS_CHECK) /* read */ + { + if(modus == MODUS_READCHECK || modus == MODUS_CHECK) + x = mymalloc("x", n); + + unsigned char *ptr = x; + + while(n > 0) + { + if(iop != fillp) + { + size_t nn = n; + if(nn > (fillp - iop)) + nn = fillp - iop; + + memcpy(ptr, iobuf_aligned + iop, nn); + + n -= nn; + ptr += nn; + iop += nn; + } + else + { + if(iop == MAX_BLOCK_SIZE) + { + iop = 0; + fillp = 0; + } + + size_t nn = n; + if(nn % PageSize > 0) + nn = (nn / PageSize + 1) * PageSize; + + if(nn > MAX_BLOCK_SIZE - fillp) + nn = MAX_BLOCK_SIZE - fillp; + + if(read(fdint, iobuf_aligned + fillp, nn) != nn) + terminate("read error"); + + fillp += nn; + } + } + + if(hash) /* to prevent call if we write/load the checksum itself */ + MD5UpdateLong(&mysum, x, nin); + + if(modus == MODUS_READCHECK || modus == MODUS_CHECK) + myfree(x); + } + else /* write */ + { + unsigned char *ptr = x; + + while(n > 0) + { + if(iop < MAX_BLOCK_SIZE) + { + size_t nn = n; + if(nn > MAX_BLOCK_SIZE - iop) + nn = MAX_BLOCK_SIZE - iop; + memcpy(iobuf_aligned + iop, ptr, nn); + + n -= nn; + ptr += nn; + iop += nn; + } + else + { + size_t nn = MAX_BLOCK_SIZE; + if(write(fdint, iobuf_aligned, nn) != nn) + terminate("write error"); + + iop = 0; + } + } + + if(hash) /* to prevent call if we write/load the checksum itself */ + MD5UpdateLong(&mysum, x, nin); + } + } +} + +/*! \brief Allocates the IO buffer for reading/writing the restart-file buffer. + * + * \return void + */ +void allocate_iobuf(void) +{ + if((MAX_BLOCK_SIZE % PageSize) > 0) + terminate("MAX_BLOCK_SIZE must be a multiple of PageSize"); + + if(!(io_buf = malloc(MAX_BLOCK_SIZE + PageSize))) + terminate("cannot allocated IO buffer"); + + iobuf_aligned = (char *)(((((size_t)io_buf) + (PageSize - 1)) / PageSize) * PageSize); + + fillp = 0; + iop = 0; +} + +/*! \brief Frees the IO buffer for reading/writing the restart-files. + * + * Writes buffer before freeing it if in MODUS_WRITE. + * + * \param[in] modus Read or write. + * + * \return void + */ +void deallocate_iobuf(int modus) +{ + if(modus == MODUS_WRITE) /* write */ + { + if(iop > 0) + { + if(iop % PageSize > 0) + iop = ((iop / PageSize) + 1) * PageSize; + + if(write(fdint, iobuf_aligned, iop) != iop) + terminate("write error"); + } + } + + free(io_buf); +} diff --git a/src/amuse/community/arepo/src/main/allvars.c b/src/amuse/community/arepo/src/main/allvars.c new file mode 100644 index 0000000000..fedd5f6306 --- /dev/null +++ b/src/amuse/community/arepo/src/main/allvars.c @@ -0,0 +1,331 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/allvars.c + * \date 05/2018 + * \brief Contains all global variables. + * \details This file contains the global variables used in Arepo. + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" + +struct data_nodelist *DataNodeList; /* to be deleted */ + +MyDouble boxSize, boxHalf; + +#ifdef LONG_X +MyDouble boxSize_X, boxHalf_X; +#else /* #ifdef LONG_X */ +#endif /* #ifdef LONG_X #else */ +#ifdef LONG_Y +MyDouble boxSize_Y, boxHalf_Y; +#else /* #ifdef LONG_Y */ +#endif /* #ifdef LONG_Y #else */ +#ifdef LONG_Z +MyDouble boxSize_Z, boxHalf_Z; +#else /* #ifdef LONG_Z */ +#endif /* #ifdef LONG_Z #else */ + +#ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG +MPI_Status mpistat; +#endif /* #ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG */ + +/*********************************************************/ +/* Global variables */ +/*********************************************************/ + +int ThisTask; /*!< the number of the local processor */ +int NTask; /*!< number of processors */ +int PTask; /*!< note: NTask = 2^PTask */ + +int ThisNode; /*!< the rank of the current compute node */ +int NumNodes; /*!< the number of compute nodes used */ +int MinTasksPerNode; /*!< the minimum number of MPI tasks that is found on any of the nodes */ +int MaxTasksPerNode; /*!< the maximum number of MPI tasks that is found on any of the nodes */ +int TasksInThisNode; /*!< number of MPI tasks on current compute node */ +int RankInThisNode; /*!< rank of the MPI task on the current compute node */ +long long MemoryOnNode; +double CPUThisRun; /*!< Sums CPU time of current process */ +int MaxTopNodes; /*!< Maximum number of nodes in the top-level tree used for domain decomposition */ +int RestartFlag; /*!< taken from command line used to start code. 0 is normal start-up from + initial conditions, 1 is resuming a run from a set of restart files, while 2 + marks a restart from a snapshot file. */ +int RestartSnapNum; +int Argc; +char **Argv; + +size_t AllocatedBytes; +size_t FreeBytes; + +int Nforces; +int *TargetList; +struct thread_data Thread[NUM_THREADS]; + +#ifdef IMPOSE_PINNING +hwloc_cpuset_t cpuset_thread[NUM_THREADS]; +#endif /* #ifdef IMPOSE_PINNING */ + +int *Exportflag, + *ThreadsExportflag[NUM_THREADS]; /*!< Buffer used for flagging whether a particle needs to be exported to another process */ +int *Exportnodecount; +int *Exportindex; + +int *Send_offset, *Send_count, *Recv_count, *Recv_offset; +int *Send_offset_nodes, *Send_count_nodes, *Recv_count_nodes, *Recv_offset_nodes; +int *TasksThatSend, *TasksThatRecv, NSendTasks, NRecvTasks; +struct send_recv_counts *Send, *Recv; + +int Mesh_nimport, Mesh_nexport, *Mesh_Send_offset, *Mesh_Send_count, *Mesh_Recv_count, *Mesh_Recv_offset; +int Force_nimport, Force_nexport, *Force_Send_offset, *Force_Send_count, *Force_Recv_count, *Force_Recv_offset; + +int TakeLevel; +int TagOffset; + +int TimeBinSynchronized[TIMEBINS]; +struct TimeBinData TimeBinsHydro, TimeBinsGravity; + +#ifdef USE_SFR +double TimeBinSfr[TIMEBINS]; +#endif + +#ifdef SUBFIND +int GrNr; +int NumPartGroup; +#endif /* #ifdef SUBFIND */ + +char DumpFlag = 1; +char DumpFlagNextSnap = 1; + +int FlagNyt = 0; + +double CPU_Step[CPU_LAST]; +double CPU_Step_Stored[CPU_LAST]; + +double WallclockTime; /*!< This holds the last wallclock time measurement for timings measurements */ +double StartOfRun; /*!< This stores the time of the start of the run for evaluating the elapsed time */ + +double EgyInjection; + +int NumPart; /*!< number of particles on the LOCAL processor */ +int NumGas; /*!< number of gas particles on the LOCAL processor */ + +gsl_rng *random_generator; /*!< a random number generator */ +gsl_rng *random_generator_aux; /*!< an auxialiary random number generator for use if one doesn't want to influence the main code's + random numbers */ + +#ifdef USE_SFR +int Stars_converted; /*!< current number of star particles in gas particle block */ +#endif + +#ifdef TOLERATE_WRITE_ERROR +int WriteErrorFlag; +char AlternativeOutputDir[MAXLEN_PATH]; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +double TimeOfLastDomainConstruction; /*!< holds what it says */ + +int *Ngblist; /*!< Buffer to hold indices of neighbours retrieved by the neighbour search + routines */ + +double DomainCorner[3], DomainCenter[3], DomainLen, DomainFac; +double DomainInverseLen, DomainBigFac; +int *DomainStartList, *DomainEndList; +double *DomainCost, *TaskCost; +int *DomainCount, *TaskCount; +struct no_list_data *ListNoData; + +int domain_bintolevel[TIMEBINS]; +int domain_refbin[TIMEBINS]; +int domain_grav_weight[TIMEBINS]; +int domain_hydro_weight[TIMEBINS]; +int domain_to_be_balanced[TIMEBINS]; + +int *DomainTask; +int *DomainNewTask; +int *DomainNodeIndex; + +peanokey *Key, *KeySorted; + +struct topnode_data *TopNodes; + +int NTopnodes, NTopleaves; + +/* variables for input/output , usually only used on process 0 */ + +char ParameterFile[MAXLEN_PATH]; /*!< file name of parameterfile used for starting the simulation */ + +FILE *FdInfo, /*!< file handle for info.txt log-file. */ + *FdEnergy, /*!< file handle for energy.txt log-file. */ + *FdTimings, /*!< file handle for timings.txt log-file. */ + *FdDomain, /*!< file handle for domain.txt log-file. */ + *FdBalance, /*!< file handle for balance.txt log-file. */ + *FdMemory, /*!< file handle for memory.txt log-file. */ + *FdTimebin, /*!< file handle for timebins.txt log-file. */ + *FdCPU; /*!< file handle for cpu.txt log-file. */ + +#ifdef DETAILEDTIMINGS +FILE *FdDetailed; +#endif /* #ifdef DETAILEDTIMINGS */ + +#ifdef OUTPUT_CPU_CSV +FILE *FdCPUCSV; +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef RESTART_DEBUG +FILE *FdRestartTest; +#endif /* #ifdef RESTART_DEBUG */ + +#ifdef USE_SFR +FILE *FdSfr; /*!< file handle for sfr.txt log-file. */ +#endif + +struct pair_data *Pairlist; + +#ifdef FORCETEST +FILE *FdForceTest; /*!< file handle for forcetest.txt log-file. */ +#endif /* #ifdef FORCETEST */ + +int WriteMiscFiles = 1; + +void *CommBuffer; /*!< points to communication buffer, which is used at a few places */ + +/*! This structure contains data which is the SAME for all tasks (mostly code parameters read from the + * parameter file). Holding this data in a structure is convenient for writing/reading the restart file, and + * it allows the introduction of new global variables in a simple way. The only thing to do is to introduce + * them into this structure. + */ +struct global_data_all_processes All; + +/*! This structure holds all the information that is + * stored for each particle of the simulation. + */ +struct particle_data *P, /*!< holds particle data on local processor */ + *DomainPartBuf; /*!< buffer for particle data used in domain decomposition */ + +struct subfind_data *PS; + +/* the following struture holds data that is stored for each SPH particle in addition to the collisionless + * variables. + */ +struct sph_particle_data *SphP, /*!< holds SPH particle data on local processor */ + *DomainSphBuf; /*!< buffer for SPH particle data in domain decomposition */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +struct special_particle_data *PartSpecialListGlobal; +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +peanokey *DomainKeyBuf; + +/*! global state of system + */ +struct state_of_system SysState, SysStateAtStart, SysStateAtEnd; + +/*! Various structures for communication during the gravity computation. + */ +struct directdata *DirectDataIn, *DirectDataAll; +struct accdata *DirectAccOut, *DirectAccIn; +int ThreadsNexport[NUM_THREADS], ThreadsNexportNodes[NUM_THREADS]; +struct data_partlist *PartList, *ThreadsPartList[NUM_THREADS]; +struct datanodelist *NodeList, *ThreadsNodeList[NUM_THREADS]; +struct potdata_out *PotDataResult, /*!< holds the partial results computed for imported particles. Note: We use GravDataResult = + GravDataGet, such that the result replaces the imported data */ + *PotDataOut; /*!< holds partial results received from other processors. This will overwrite the GravDataIn array */ + +/*! Header for the standard file format. + */ +struct io_header header; /*!< holds header for snapshot files */ +#ifdef NTYPES_ICS +struct io_header_ICs header_ICs; /*!< holds header for IC files */ +#endif /* #ifdef NTYPES_ICS */ +char (*Parameters)[MAXLEN_PARAM_TAG]; +char (*ParametersValue)[MAXLEN_PARAM_VALUE]; +char *ParametersType; + +/*! Variables for gravitational tree + * ------------------ + */ +int Tree_MaxPart; +int Tree_NumNodes; +int Tree_MaxNodes; +int Tree_FirstNonTopLevelNode; +int Tree_NumPartImported; +int Tree_NumPartExported; +int Tree_ImportedNodeOffset; +int Tree_NextFreeNode; +MyDouble *Tree_Pos_list; +unsigned long long *Tree_IntPos_list; +int *Tree_Task_list; +int *Tree_ResultIndexList; + +struct treepoint_data *Tree_Points; +struct resultsactiveimported_data *Tree_ResultsActiveImported; + +int *Nextnode; /*!< gives next node in tree walk (nodes array) */ +int *Father; /*!< gives parent node in tree (Prenodes array) */ + +struct NODE *Nodes; /*!< points to the actual memory allocted for the nodes */ + /*!< this is a pointer used to access the nodes which is shifted such that Nodes[All.MaxPart] + gives the first allocated node */ + +#ifdef MULTIPLE_NODE_SOFTENING +struct ExtNODE *ExtNodes; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + +float *Nodes_GravCost; + +/*! Variables for neighbor tree + * ----------------- + */ +int Ngb_MaxPart; +int Ngb_NumNodes; +int Ngb_MaxNodes; +int Ngb_FirstNonTopLevelNode; +int Ngb_NextFreeNode; +int *Ngb_Father; +int *Ngb_Marker; +int Ngb_MarkerValue; + +int *Ngb_DomainNodeIndex; +int *DomainListOfLocalTopleaves; +int *DomainNLocalTopleave; +int *DomainFirstLocTopleave; +int *Ngb_Nextnode; + +/*! The ngb-tree data structure + */ +struct NgbNODE *Ngb_Nodes; +struct ExtNgbNODE *ExtNgb_Nodes; + +#ifdef STATICNFW +double Rs, R200; +double Dc; +double RhoCrit, V200; +double fac; +#endif /* #ifdef STATICNFW */ + +int MaxThreads = 1; + +IO_Field *IO_Fields; +int N_IO_Fields = 0; +int Max_IO_Fields = 0; diff --git a/src/amuse/community/arepo/src/main/allvars.h b/src/amuse/community/arepo/src/main/allvars.h new file mode 100644 index 0000000000..2dc46e56b3 --- /dev/null +++ b/src/amuse/community/arepo/src/main/allvars.h @@ -0,0 +1,1924 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/allvars.h + * \date 05/2018 + * \brief All (global) variables. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 30.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef ALLVARS_H +#define ALLVARS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "./arepoconfig.h" + +#ifdef IMPOSE_PINNING +#include +#endif /* #ifdef IMPOSE_PINNING */ + +#include "../time_integration/timestep.h" +#include "../utils/dtypes.h" +#include "../utils/tags.h" + +#define AREPO_VERSION "Arepo public 1.0" /* code version string */ + +/* default values for unspecified config options */ + +#if defined(__linux__) && !defined(HOST_MEMORY_REPORTING) +#define HOST_MEMORY_REPORTING +#endif /* #if defined(__linux__) && !defined(HOST_MEMORY_REPORTING) */ + +#ifndef LOAD_TYPES +#define LOAD_TYPES 0xff +#endif /* #ifndef LOAD_TYPES */ + +#if defined(REFINEMENT_SPLIT_CELLS) || defined(REFINEMENT_MERGE_CELLS) +#define REFINEMENT +#else /* #if defined (REFINEMENT_SPLIT_CELLS) || defined (REFINEMENT_MERGE_CELLS) */ +#undef REFINEMENT +#endif /* #if defined (REFINEMENT_SPLIT_CELLS) || defined (REFINEMENT_MERGE_CELLS) #else */ + +#ifndef NTYPES +#define NTYPES 6 +#endif /* #ifndef NTYPES */ + +#ifndef NSOFTTYPES +#define NSOFTTYPES NTYPES +#endif /* #ifndef NSOFTTYPES */ + +#if !defined(OUTPUT_PRESSURE_GRADIENT) && !defined(OUTPUT_DENSITY_GRADIENT) && !defined(OUTPUT_VELOCITY_GRADIENT) && \ + !defined(OUTPUT_BFIELD_GRADIENT) && !defined(OUTPUT_DIVVEL) && !defined(OUTPUT_CURLVEL) && !defined(OUTPUT_VORTICITY) +// only if no gradient output defined, no need to update them directly before output. +#else /* #if !defined(OUTPUT_PRESSURE_GRADIENT) && !defined(OUTPUT_DENSITY_GRADIENT) && !defined(OUTPUT_VELOCITY_GRADIENT) && \ + !defined(OUTPUT_BFIELD_GRADIENT) && !defined(OUTPUT_DIVVEL) && !defined(OUTPUT_CURLVEL) && !defined(OUTPUT_VORTICITY) */ +#define UPDATE_GRADIENTS_FOR_OUTPUT +#endif /* #if !defined(OUTPUT_PRESSURE_GRADIENT) && !defined(OUTPUT_DENSITY_GRADIENT) && !defined(OUTPUT_VELOCITY_GRADIENT) && \ + !defined(OUTPUT_BFIELD_GRADIENT) && !defined(OUTPUT_DIVVEL) && !defined(OUTPUT_CURLVEL) && !defined(OUTPUT_VORTICITY) #else \ + */ + +#ifdef ADAPTIVE_HYDRO_SOFTENING +#ifndef NSOFTTYPES_HYDRO +#define NSOFTTYPES_HYDRO 64 +#endif /* #ifndef NSOFTTYPES_HYDRO */ +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#undef NSOFTTYPES_HYDRO +#define NSOFTTYPES_HYDRO 0 +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ + +#if defined(SAVE_HSML_IN_SNAPSHOT) +#define SUBFIND_CALC_MORE +#endif /* #if defined(SAVE_HSML_IN_SNAPSHOT) */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +#define NO_SELFGRAVITY_TYPE \ + EXACT_GRAVITY_FOR_PARTICLE_TYPE // exclude particle type from self-gravity (can be used with exact gravity) +#define NO_GRAVITY_TYPE EXACT_GRAVITY_FOR_PARTICLE_TYPE // disable computation of gravity on particle type +#define EXACT_GRAVITY_REACTION // include reaction to other particle types when using exact gravity +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +/* restrictions on config option combinations */ +#if(NSOFTTYPES + NSOFTTYPES_HYDRO) >= 254 +#error "(NSOFTTYPES + NSOFTTYPES_HYDRO) >= 254" +#endif /* #if (NSOFTTYPES + NSOFTTYPES_HYDRO) >= 254 */ + +#if NSOFTTYPES < 2 +#error "NSOFTTYPES < 2" +#endif /* #if NSOFTTYPES < 2 */ + +#if defined(HOST_MEMORY_REPORTING) && !defined(__linux__) +#error "HOST_MEMORY_REPORTING only works under Linux." +#endif /* #if defined(HOST_MEMORY_REPORTING) && !defined(__linux__) */ + +#if defined(USE_DIRECT_IO_FOR_RESTARTS) && !defined(__linux__) +#error "USE_DIRECT_IO_FOR_RESTARTS only works under Linux." +#endif /* #if defined(USE_DIRECT_IO_FOR_RESTARTS) && !defined(__linux__) */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING +#if !((INDIVIDUAL_GRAVITY_SOFTENING + 0) >= 1) +#error "set INDIVIDUAL_GRAVITY_SOFTENING to a bitmask of particle types" +#endif /* #if !((INDIVIDUAL_GRAVITY_SOFTENING+0) >= 1) */ +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + +#ifdef OUTPUTPOTENTIAL +#ifndef EVALPOTENTIAL +#error "the option OUTPUTPOTENTIAL requires EVALPOTENTIAL" +#endif /* #ifndef EVALPOTENTIAL */ +#endif /* #ifdef OUTPUTPOTENTIAL */ + +#if defined(CELL_CENTER_GRAVITY) && defined(SELFGRAVITY) +#ifndef HIERARCHICAL_GRAVITY +#error "the of option CELL_CENTER_GRAVITY requires HIERARCHICAL_GRAVITY" +#endif /* #ifndef HIERARCHICAL_GRAVITY */ +#endif /* #if defined(CELL_CENTER_GRAVITY) && defined(SELFGRAVITY) */ + +#ifdef MHD +#ifndef RIEMANN_HLLD +#error "the of option MHD requires RIEMANN_HLLD" +#endif /* #ifndef RIEMANN_HLLD */ +#endif /* #ifdef MHD */ + +/* optional additional headers based on config options */ + +#include "../utils/timer.h" + +#if defined(COOLING) +#include "../cooling/cooling_vars.h" +#endif /* #if defined(COOLING) */ + +#ifdef ADDBACKGROUNDGRID +#include "../add_backgroundgrid/add_bggrid.h" +#endif /* #ifdef ADDBACKGROUNDGRID */ + +/* function mappings and macros */ + +#ifdef MPI_HYPERCUBE_ALLGATHERV +#define MPI_Allgatherv MPI_hypercube_Allgatherv +#endif /* #ifdef MPI_HYPERCUBE_ALLGATHERV */ + +#ifdef MPISENDRECV_CHECKSUM +#define MPI_Sendrecv MPI_Check_Sendrecv +#endif /* #ifdef MPISENDRECV_CHECKSUM */ + +#define terminate(...) \ + { \ + if(FlagNyt == 0) \ + { \ + char termbuf1[1000], termbuf2[1000]; \ + sprintf(termbuf1, "TERMINATE: ******!!!!!****** Code termination on task=%d, function %s(), file %s, line %d", ThisTask, \ + __FUNCTION__, __FILE__, __LINE__); \ + sprintf(termbuf2, __VA_ARGS__); \ + printf("%s: %s\n", termbuf1, termbuf2); \ + fflush(stdout); \ + FlagNyt = 1; \ + MPI_Abort(MPI_COMM_WORLD, 1); \ + } \ + exit(1); \ + } +#define mpi_terminate(...) \ + { \ + if(ThisTask == 0) \ + terminate(__VA_ARGS__); \ + } +#define warn(...) \ + { \ + char termbuf1[1000], termbuf2[1000]; \ + sprintf(termbuf1, "WARNING: Code warning on task=%d, function %s(), file %s, line %d", ThisTask, __FUNCTION__, __FILE__, \ + __LINE__); \ + sprintf(termbuf2, __VA_ARGS__); \ + printf("%s: %s\n", termbuf1, termbuf2); \ + myflush(stdout); \ + FILE *fd = fopen("WARNINGS", "a"); \ + fprintf(fd, "%s: %s\n", termbuf1, termbuf2); \ + fclose(fd); \ + } + +/* define an "assert" macro which outputs MPI task (we do NOT want to + call MPI_Abort, because then the assertion failure isn't caught in + the debugger) */ +#define myassert(cond) \ + if(!(cond)) \ + { \ + char termbuf[1000]; \ + sprintf(termbuf, "Assertion failure!\n\ttask=%d, function %s(), file %s, line %d:\n\t%s\n", ThisTask, __FUNCTION__, __FILE__, \ + __LINE__, #cond); \ + printf("%s", termbuf); \ + myflush(stdout); \ + assert(0); \ + } + +/* memory manager */ +#define mymalloc(x, y) mymalloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__, 0, NULL) +#define mymalloc_g(x, y) mymalloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__, 0, callorigin) +#define mymalloc_clear(x, y) mymalloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__, 1, NULL) +#define mymalloc_movable(x, y, z) mymalloc_movable_fullinfo(x, y, z, __FUNCTION__, __FILE__, __LINE__, NULL) +#define mymalloc_movable_g(x, y, z) mymalloc_movable_fullinfo(x, y, z, __FUNCTION__, __FILE__, __LINE__, callorigin) +#define myrealloc(x, y) myrealloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__) +#define myrealloc_movable(x, y) myrealloc_movable_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__) +#define myfree(x) myfree_fullinfo(x, __FUNCTION__, __FILE__, __LINE__) +#define myfree_movable(x) myfree_movable_fullinfo(x, __FUNCTION__, __FILE__, __LINE__) + +#define MAX_FIRST_ELEMENTS_CONSIDERED \ + 5 /* This sets the number of lowest loaded tasks to be considered for assignment of next domain patch */ + +#define NUMBER_OF_MEASUREMENTS_TO_RECORD 6 + +#ifndef GRAVCOSTLEVELS +#define GRAVCOSTLEVELS 6 +#endif /* #ifndef GRAVCOSTLEVELS */ + +#define MODE_LOCAL_NO_EXPORT -1 +#define MODE_LOCAL_PARTICLES 0 +#define MODE_IMPORTED_PARTICLES 1 +#define MODE_FINISHED 2 + +#ifndef DIRECT_SUMMATION_THRESHOLD +#define DIRECT_SUMMATION_THRESHOLD 3000 +#endif /* #ifndef DIRECT_SUMMATION_THRESHOLD */ + +#define MODE_FIRST_HALFSTEP 0 +#define MODE_SECOND_HALFSTEP 1 + +#define FLAG_PARTIAL_TREE 0 +#define FLAG_FULL_TREE 1 + +#ifndef MPI_MESSAGE_SIZELIMIT_IN_MB +#define MPI_MESSAGE_SIZELIMIT_IN_MB 200 +#endif /* #ifndef MPI_MESSAGE_SIZELIMIT_IN_MB */ + +#define MPI_MESSAGE_SIZELIMIT_IN_BYTES ((MPI_MESSAGE_SIZELIMIT_IN_MB)*1024LL * 1024LL) + +#define COMMBUFFERSIZE (32 * 1024LL * 1024LL) + +#define NUM_THREADS 1 /* no OpenMP support in this code! */ + +extern int Nforces; +extern int *TargetList; + +extern struct thread_data +{ + int Nexport __attribute__((__aligned__(64))); /* to align on different cache lines */ + int NexportNodes; + int Interactions; + int dummy; + double Cost; + + double Costtotal; /*!< The total cost of the particles/nodes processed by each thread */ + double Ewaldcount; /*!< The total cost for the Ewald correction per thread */ + int FirstExec; /*!< Keeps track, if a given thread executes the gravity_primary_loop() for the first time */ + + size_t ExportSpace; + size_t InitialSpace; + size_t ItemSize; + + int *P_CostCount; + int *TreePoints_CostCount; + int *Node_CostCount; + + struct data_partlist *PartList; + + int *Ngblist; + double *R2list; + int *Exportflag; + int *toGoDM; + int *toGoSph; + +} Thread[NUM_THREADS]; + +/* If we use a static Voronoi mesh with local timestepping and no rebuild of + * the static mesh, then we need to backup the face areas before calling + * compute_interface_fluxes(), because this function calls face_get_normals() + * which sets some face area to 0 under some circumstances */ +#if defined(VORONOI_STATIC_MESH) && !defined(FORCE_EQUAL_TIMESTEPS) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) +#define VORONOI_BACKUP_RESTORE_FACE_AREAS +#else /* #if defined(VORONOI_STATIC_MESH) && !defined(FORCE_EQUAL_TIMESTEPS) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) \ + */ +#undef VORONOI_BACKUP_RESTORE_FACE_AREAS +#endif /* #if defined(VORONOI_STATIC_MESH) && !defined(FORCE_EQUAL_TIMESTEPS) && \ + !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) #else */ + +#ifdef IMPOSE_PINNING +extern hwloc_cpuset_t cpuset_thread[NUM_THREADS]; +#endif /* #ifdef IMPOSE_PINNING */ + +#ifdef ONEDIMS +#define ALLOC_TOLERANCE 0.3 +#else /* #ifdef ONEDIMS */ +#define ALLOC_TOLERANCE 0.1 +#endif /* #ifdef ONEDIMS #else */ +#define ALLOC_STARBH_ROOM 0.02 + +#ifdef TOLERATE_WRITE_ERROR +#define IO_TRIALS 20 +#define IO_SLEEP_TIME 10 +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +/* calculate appropriate value of MAXSCALARS */ + +#if defined(REFINEMENT_HIGH_RES_GAS) || defined(PASSIVE_SCALARS) + +#ifdef REFINEMENT_HIGH_RES_GAS +#define COUNT_REFINE 1 +#else /* #ifdef REFINEMENT_HIGH_RES_GAS */ +#define COUNT_REFINE 0 +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS #else */ + +#ifdef PASSIVE_SCALARS +#define COUNT_PASSIVE_SCALARS PASSIVE_SCALARS +#else /* #ifdef PASSIVE_SCALARS */ +#define COUNT_PASSIVE_SCALARS 0 +#endif /* #ifdef PASSIVE_SCALARS #else */ + +#define MAXSCALARS (COUNT_REFINE + COUNT_PASSIVE_SCALARS) +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) || defined(PASSIVE_SCALARS)*/ + +/* calculate appropriate value of MAXGRADIENTS */ + +#define COUNT_GRAD_DEFAULT 5 + +#ifdef MHD +#define COUNT_GRAD_MHD 3 +#else /* #ifdef MHD */ +#define COUNT_GRAD_MHD 0 +#endif /* #ifdef MHD #else */ + +#ifdef MAXSCALARS +#define COUNT_GRAD_SCALARS MAXSCALARS +#else /* #ifdef MAXSCALARS */ +#define COUNT_GRAD_SCALARS 0 +#endif /* #ifdef MAXSCALARS #else*/ + +#define MAXGRADIENTS (COUNT_GRAD_DEFAULT + COUNT_GRAD_MHD + COUNT_GRAD_SCALARS) + +/*************************************/ + +/*! For Peano-Hilbert order. + * Note: Maximum is 10 to fit in 32-bit integer, + * maximum is 21 to fit into 64-bit integer, + * and 42 is the absolute maximum, for which 128-bit integers are needed + */ +#ifndef BITS_PER_DIMENSION +#define BITS_PER_DIMENSION 42 +#endif /* #ifndef BITS_PER_DIMENSION */ +#if(BITS_PER_DIMENSION <= 21) +typedef unsigned long long peanokey; +#else /* #if (BITS_PER_DIMENSION <= 21) */ +typedef __int128 peanokey; +#endif /* #if (BITS_PER_DIMENSION <= 21) #else */ +#if(BITS_PER_DIMENSION <= 31) +typedef unsigned int peano1D; +#else /* #if (BITS_PER_DIMENSION <= 31) */ +#if(BITS_PER_DIMENSION <= 42) +typedef unsigned long long peano1D; +#else /* #if (BITS_PER_DIMENSION <= 42) */ +#error "BITS_PER_DIMENSION can be at most 42" +#endif /* #if (BITS_PER_DIMENSION <= 42) #else */ +#endif /* #if (BITS_PER_DIMENSION <= 31) #else */ + +#define PEANOCELLS (((peanokey)1) << (3 * BITS_PER_DIMENSION)) + +#define MAX_FLOAT_NUMBER 1e37 +#define MIN_FLOAT_NUMBER 1e-37 +#define MAX_DOUBLE_NUMBER 1e306 +#define MIN_DOUBLE_NUMBER 1e-306 + +#ifdef DOUBLEPRECISION +#if(DOUBLEPRECISION == 2) +#define MAX_REAL_NUMBER MAX_FLOAT_NUMBER +#define MIN_REAL_NUMBER MIN_FLOAT_NUMBER +#else /* #if (DOUBLEPRECISION==2) */ +#define MAX_REAL_NUMBER MAX_DOUBLE_NUMBER +#define MIN_REAL_NUMBER MIN_DOUBLE_NUMBER +#endif /* #if (DOUBLEPRECISION==2) #else */ +#else /* #ifdef DOUBLEPRECISION */ +#define MAX_REAL_NUMBER MAX_FLOAT_NUMBER +#define MIN_REAL_NUMBER MIN_FLOAT_NUMBER +#endif /* #ifdef DOUBLEPRECISION #else */ + +#ifndef GAMMA +#define GAMMA (5. / 3.) /*!< adiabatic index of simulated gas */ +#endif /* #ifndef GAMMA */ +#define GAMMA_MINUS1 (GAMMA - 1.) +#define GAMMA_PLUS1 (GAMMA + 1.) + +#define HYDROGEN_MASSFRAC 0.76 /*!< mass fraction of hydrogen, relevant only for radiative cooling */ +#define HE_ABUND ((1. / HYDROGEN_MASSFRAC - 1.) / 4.) + +/* ... often used physical constants (cgs units; NIST 2010) */ + +#define GRAVITY 6.6738e-8 +#define SOLAR_MASS 1.989e33 +#define SOLAR_LUM 3.826e33 +#define SOLAR_EFF_TEMP 5.780e3 +#define RAD_CONST 7.5657e-15 +#define AVOGADRO 6.02214e23 +#define BOLTZMANN 1.38065e-16 +#define GAS_CONST 8.31446e7 +#define CLIGHT 2.99792458e10 + +#define PLANCK 6.6260695e-27 +#define PARSEC 3.085678e18 +#define KILOPARSEC 3.085678e21 +#define MEGAPARSEC 3.085678e24 +#define ASTRONOMICAL_UNIT 1.49598e13 +#define PROTONMASS 1.67262178e-24 +#define ELECTRONMASS 9.1093829e-28 +#define THOMPSON 6.65245873e-25 +#define ELECTRONCHARGE 4.8032042e-10 +#define HUBBLE 3.2407789e-18 /* in h/sec */ +#define LYMAN_ALPHA 1215.6e-8 /* 1215.6 Angstroem */ +#define LYMAN_ALPHA_HeII 303.8e-8 /* 303.8 Angstroem */ +#define OSCILLATOR_STRENGTH 0.41615 +#define OSCILLATOR_STRENGTH_HeII 0.41615 +#define ELECTRONVOLT_IN_ERGS 1.60217656e-12 + +#define SEC_PER_GIGAYEAR 3.15576e16 +#define SEC_PER_MEGAYEAR 3.15576e13 +#define SEC_PER_YEAR 3.15576e7 + +#ifndef FOF_PRIMARY_LINK_TYPES +#define FOF_PRIMARY_LINK_TYPES 2 +#endif /* #ifndef FOF_PRIMARY_LINK_TYPES */ + +#ifndef FOF_SECONDARY_LINK_TYPES +#define FOF_SECONDARY_LINK_TYPES 0 +#endif /* #ifndef FOF_SECONDARY_LINK_TYPES */ + +#ifndef ASMTH +/*! ASMTH gives the scale of the short-range/long-range force split in units + * of FFT-mesh cells + */ +#define ASMTH 1.25 +#endif /* #ifndef ASMTH */ + +#ifndef RCUT +/*! RCUT gives the maximum distance (in units of the scale used for the force + * split) out to which short-range forces are evaluated in the short-range + * tree walk. + */ +#define RCUT 4.5 +#endif /* #ifndef RCUT */ + +#define MAXLEN_OUTPUTLIST 1100 /*!< maxmimum number of entries in output list */ +#define MAXLEN_PATH 256 /*!< maximum length of various filenames (full path) */ +#define MAXLEN_PARAM_TAG 50 /*!< maximum length of the tag of a parameter in the parameter file */ +#define MAXLEN_PARAM_VALUE 200 /*!< maximum length of the value of a parameter in the parameter file */ +#define MAX_PARAMETERS 300 /*!< maximum number of parameters in the parameter file */ +#define DRIFT_TABLE_LENGTH 1000 /*!< length of the lookup table used to hold the drift and kick factors */ + +#define BASENUMBER 100 +#define HIGHRESMASSFAC 0.5 + +#define MAXITER 300000 /*! Maximum number of iterations before process is terminated */ + +#ifndef FOF_LINKLENGTH +#define FOF_LINKLENGTH 0.2 +#endif /* #ifndef FOF_LINKLENGTH */ + +#ifndef FOF_GROUP_MIN_LEN +#define FOF_GROUP_MIN_LEN 32 +#endif /* #ifndef FOF_GROUP_MIN_LEN */ + +typedef struct +{ + double r; + double mass; +} sort_r2list; + +typedef struct +{ + MyFloat r2; + int index; +} r2type; + +#include "../mesh/mesh.h" +#include "../mesh/voronoi/voronoi.h" + +struct unbind_data +{ + int index; +}; + +#ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG +extern MPI_Status mpistat; +#undef MPI_STATUS_IGNORE +#define MPI_STATUS_IGNORE &mpistat +#endif /* #ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG */ + +#define FLT(x) (x) + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif /* #ifndef M_PI */ + +#define TO_MBYTE_FAC (1.0 / (1024.0 * 1024.0)) + +#ifdef ONEDIMS +#define NUMDIMS 1 +#define KERNEL_COEFF_1 (4.0 / 3) +#define KERNEL_COEFF_2 (8.0) +#define KERNEL_COEFF_3 (24.0) +#define KERNEL_COEFF_4 (16.0) +#define KERNEL_COEFF_5 (8.0 / 3) +#define KERNEL_COEFF_6 (-8.0) +#define NORM_COEFF 2.0 +#else /* #ifdef ONEDIMS */ +#ifndef TWODIMS +#define NUMDIMS 3 /*!< For 3D-normalized kernel */ +#define KERNEL_COEFF_1 2.546479089470 /*!< Coefficients for SPH spline kernel and its derivative */ +#define KERNEL_COEFF_2 15.278874536822 +#define KERNEL_COEFF_3 45.836623610466 +#define KERNEL_COEFF_4 30.557749073644 +#define KERNEL_COEFF_5 5.092958178941 +#define KERNEL_COEFF_6 (-15.278874536822) +#define NORM_COEFF 4.188790204786 /*!< Coefficient for kernel normalization. Note: 4.0/3 * PI = 4.188790204786 */ +#else /* #ifndef TWODIMS */ +#define NUMDIMS 2 /*!< For 2D-normalized kernel */ +#define KERNEL_COEFF_1 (5.0 / 7 * 2.546479089470) /*!< Coefficients for SPH spline kernel and its derivative */ +#define KERNEL_COEFF_2 (5.0 / 7 * 15.278874536822) +#define KERNEL_COEFF_3 (5.0 / 7 * 45.836623610466) +#define KERNEL_COEFF_4 (5.0 / 7 * 30.557749073644) +#define KERNEL_COEFF_5 (5.0 / 7 * 5.092958178941) +#define KERNEL_COEFF_6 (5.0 / 7 * (-15.278874536822)) +#define NORM_COEFF M_PI /*!< Coefficient for kernel normalization. */ +#endif /* #ifndef TWODIMS #else */ +#endif /* #ifdef ONEDIMS #else*/ + +#define SOFTFAC1 10.666666666667 /*!< Coefficients for gravitational softening */ +#define SOFTFAC2 32.0 +#define SOFTFAC3 (-38.4) +#define SOFTFAC4 (-2.8) +#define SOFTFAC5 5.333333333333 +#define SOFTFAC6 6.4 +#define SOFTFAC7 (-9.6) +#define SOFTFAC8 21.333333333333 +#define SOFTFAC9 (-48.0) +#define SOFTFAC10 38.4 +#define SOFTFAC11 (-10.666666666667) +#define SOFTFAC12 (-0.066666666667) +#define SOFTFAC13 (-3.2) +#define SOFTFAC14 0.066666666667 +#define SOFTFAC15 (-16.0) +#define SOFTFAC16 9.6 +#define SOFTFAC17 (-2.133333333333) +#define SOFTFAC18 128.0 +#define SOFTFAC19 (-115.2) +#define SOFTFAC20 21.333333333333 +#define SOFTFAC21 (-96.0) +#define SOFTFAC22 115.2 +#define SOFTFAC23 (-42.666666666667) +#define SOFTFAC24 0.1333333333333 + +extern MyDouble boxSize, boxHalf; +#ifdef LONG_X +extern MyDouble boxSize_X, boxHalf_X; +#else /* #ifdef LONG_X */ +#define boxSize_X boxSize +#define boxHalf_X boxHalf +#endif /* #ifdef LONG_X #else */ +#ifdef LONG_Y +extern MyDouble boxSize_Y, boxHalf_Y; +#else /* #ifdef LONG_Y */ +#define boxSize_Y boxSize +#define boxHalf_Y boxHalf +#endif /* #ifdef LONG_Y #else */ +#ifdef LONG_Z +extern MyDouble boxSize_Z, boxHalf_Z; +#else /* #ifdef LONG_Z */ +#define boxSize_Z boxSize +#define boxHalf_Z boxHalf +#endif /* #ifdef LONG_Z #else */ + +#if !defined(GRAVITY_NOT_PERIODIC) +#define GRAVITY_NEAREST_X(x) \ + (xtmp = (x), (xtmp > boxHalf_X) ? (xtmp - boxSize_X) : ((xtmp < -boxHalf_X) ? (xtmp + boxSize_X) : (xtmp))) +#define GRAVITY_NEAREST_Y(x) \ + (ytmp = (x), (ytmp > boxHalf_Y) ? (ytmp - boxSize_Y) : ((ytmp < -boxHalf_Y) ? (ytmp + boxSize_Y) : (ytmp))) +#define GRAVITY_NEAREST_Z(x) \ + (ztmp = (x), (ztmp > boxHalf_Z) ? (ztmp - boxSize_Z) : ((ztmp < -boxHalf_Z) ? (ztmp + boxSize_Z) : (ztmp))) +#else /* #if !defined(GRAVITY_NOT_PERIODIC) */ +#define GRAVITY_NEAREST_X(x) (x) +#define GRAVITY_NEAREST_Y(x) (x) +#define GRAVITY_NEAREST_Z(x) (x) +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) #else */ + +#if !defined(GRAVITY_NOT_PERIODIC) +#define FOF_NEAREST_LONG_X(x) (xtmp = fabs(x), (xtmp > boxHalf_X) ? (boxSize_X - xtmp) : xtmp) +#define FOF_NEAREST_LONG_Y(x) (ytmp = fabs(x), (ytmp > boxHalf_Y) ? (boxSize_Y - ytmp) : ytmp) +#define FOF_NEAREST_LONG_Z(x) (ztmp = fabs(x), (ztmp > boxHalf_Z) ? (boxSize_Z - ztmp) : ztmp) +#else /* #if !defined(GRAVITY_NOT_PERIODIC) */ +#define FOF_NEAREST_LONG_X(x) fabs(x) +#define FOF_NEAREST_LONG_Y(x) fabs(x) +#define FOF_NEAREST_LONG_Z(x) fabs(x) +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) #else */ + +/* periodicity of gas */ +#ifndef REFLECTIVE_X +#define NGB_PERIODIC_LONG_X(x) (xtmp = fabs(x), (xtmp > boxHalf_X) ? (boxSize_X - xtmp) : xtmp) +#define NEAREST_X(x) (xtmp = (x), (xtmp > boxHalf_X) ? (xtmp - boxSize_X) : ((xtmp < -boxHalf_X) ? (xtmp + boxSize_X) : (xtmp))) +#define WRAP_X(x) (xtmp = (x), (xtmp > boxSize_X) ? (xtmp - boxSize_X) : ((xtmp < 0) ? (xtmp + boxSize_X) : (xtmp))) +#else /* #ifndef REFLECTIVE_X */ +#define NGB_PERIODIC_LONG_X(x) fabs(x) +#define NEAREST_X(x) (x) +#define WRAP_X(x) (x) +#endif /* #ifndef REFLECTIVE_X #else */ + +#ifndef REFLECTIVE_Y +#define NGB_PERIODIC_LONG_Y(x) (ytmp = fabs(x), (ytmp > boxHalf_Y) ? (boxSize_Y - ytmp) : ytmp) +#define NEAREST_Y(x) (ytmp = (x), (ytmp > boxHalf_Y) ? (ytmp - boxSize_Y) : ((ytmp < -boxHalf_Y) ? (ytmp + boxSize_Y) : (ytmp))) +#define WRAP_Y(x) (ytmp = (x), (ytmp > boxSize_Y) ? (ytmp - boxSize_Y) : ((ytmp < 0) ? (ytmp + boxSize_Y) : (ytmp))) +#else /* #ifndef REFLECTIVE_Y */ +#define NGB_PERIODIC_LONG_Y(x) fabs(x) +#define NEAREST_Y(x) (x) +#define WRAP_Y(x) (x) +#endif /* #ifndef REFLECTIVE_Y #else */ + +#ifndef REFLECTIVE_Z +#define NGB_PERIODIC_LONG_Z(x) (ztmp = fabs(x), (ztmp > boxHalf_Z) ? (boxSize_Z - ztmp) : ztmp) +#define NEAREST_Z(x) (ztmp = (x), (ztmp > boxHalf_Z) ? (ztmp - boxSize_Z) : ((ztmp < -boxHalf_Z) ? (ztmp + boxSize_Z) : (ztmp))) +#define WRAP_Z(x) (ztmp = (x), (ztmp > boxSize_Z) ? (ztmp - boxSize_Z) : ((ztmp < 0) ? (ztmp + boxSize_Z) : (ztmp))) +#else /* #ifndef REFLECTIVE_Z */ +#define NGB_PERIODIC_LONG_Z(x) fabs(x) +#define NEAREST_Z(x) (x) +#define WRAP_Z(x) (x) +#endif /* #ifndef REFLECTIVE_Z #else */ + +#define FACT1 0.366025403785 /* FACT1 = 0.5 * (sqrt(3)-1) */ +#define FAC_TWO_TO_TWO_THIRDS 1.5874011 + +/*********************************************************/ +/* Global variables */ +/*********************************************************/ + +extern int TimeBinSynchronized[TIMEBINS]; +extern struct TimeBinData TimeBinsHydro, TimeBinsGravity; + +#ifdef USE_SFR +extern double TimeBinSfr[TIMEBINS]; +#endif /* #ifdef USE_SFR */ + +extern int ThisTask; /*!< the number of the local processor */ +extern int NTask; /*!< number of processors */ +extern int PTask; /*!< note: NTask = 2^PTask */ + +extern int ThisNode; /*!< the rank of the current compute node */ +extern int NumNodes; /*!< the number of compute nodes used */ +extern int MinTasksPerNode; /*!< the minimum number of MPI tasks that is found on any of the nodes */ +extern int MaxTasksPerNode; /*!< the maximum number of MPI tasks that is found on any of the nodes */ +extern int TasksInThisNode; /*!< number of MPI tasks on current compute node */ +extern int RankInThisNode; /*!< rank of the MPI task on the current compute node */ +extern long long MemoryOnNode; + +extern double CPUThisRun; /*!< Sums CPU time of current process */ + +extern int MaxTopNodes; /*!< Maximum number of nodes in the top-level tree used for domain decomposition */ + +extern int RestartFlag; /*!< taken from command line used to start code. 0 is normal start-up from + initial conditions, 1 is resuming a run from a set of restart files, while 2 + marks a restart from a snapshot file. */ +extern int RestartSnapNum; +extern int TakeLevel; +extern int TagOffset; + +extern int Argc; +extern char **Argv; + +extern double CPU_Step[CPU_LAST]; +extern double CPU_Step_Stored[CPU_LAST]; + +extern double WallclockTime; /*!< This holds the last wallclock time measurement for timings measurements */ +extern double StartOfRun; /*!< This stores the time of the start of the run for evaluating the elapsed time */ + +extern size_t AllocatedBytes; +extern size_t FreeBytes; + +extern char DumpFlag; +extern char DumpFlagNextSnap; + +extern int FlagNyt; + +extern int NumPart; /*!< number of particles on the LOCAL processor */ +extern int NumGas; /*!< number of gas particles on the LOCAL processor */ + +extern gsl_rng *random_generator; /*!< a random number generator */ +extern gsl_rng *random_generator_aux; /*!< an auxialiary random number generator for use if one doesn't want to influence the main + code's random numbers */ + +#ifdef USE_SFR +extern int Stars_converted; /*!< current number of star particles in gas particle block */ +#endif /* #ifdef USE_SFR */ + +#ifdef TOLERATE_WRITE_ERROR +extern int WriteErrorFlag; +extern char AlternativeOutputDir[MAXLEN_PATH]; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +extern double EgyInjection; + +extern double TimeOfLastDomainConstruction; /*!< holds what it says */ + +extern double DomainCorner[3], DomainCenter[3], DomainLen, DomainFac; +extern double DomainInverseLen, DomainBigFac; +extern int *DomainStartList, *DomainEndList; +extern double *DomainCost, *TaskCost; +extern int *DomainCount, *TaskCount; +extern struct no_list_data +{ + int task; + int no; + int domainCount; + double domainCost; +} * ListNoData; + +extern int domain_bintolevel[TIMEBINS]; +extern int domain_refbin[TIMEBINS]; +extern int domain_grav_weight[TIMEBINS]; +extern int domain_hydro_weight[TIMEBINS]; +extern int domain_to_be_balanced[TIMEBINS]; + +/*! Array of task numbers holding the respective top-level nodes. For + the topnodes entries, it is indexed by the Leaf member, for + pseudoparticles it is indexed by the node + number-MaxPart-MaxNodes. */ +extern int *DomainTask; +extern int *DomainNewTask; + +/*! Array of indices of the main tree nodes that are identical to the + * top-level nodes. For the topnodes entries, it is indexed by the + * Leaf member, for pseudoparticles it is indexed by the node + * number-MaxPart-MaxNodes. + */ +extern int *DomainNodeIndex; + +extern peanokey *Key, *KeySorted; + +/*! The top node structure is an octree used for encoding the domain + * decomposition. Its leaf nodes are the units into which the domain + * is decomposed. + */ +extern struct topnode_data +{ + peanokey Size; + peanokey StartKey; + long long Count; + /*! The index of the first daughter node. The remaining 7 follow + sequentially, I think. */ + int Daughter; + /*! The index of this topnode in the DomainTask etc arrays. Is this + only valid for topnodes that have daughter=-1, i.e. the actual + leaves? */ + int Leaf; + unsigned char MortonToPeanoSubnode[8]; +} * TopNodes; + +extern int NTopnodes, NTopleaves; + +/*! Variables for gravitational tree */ +extern int Tree_MaxPart; +extern int Tree_NumNodes; +extern int Tree_MaxNodes; +extern int Tree_FirstNonTopLevelNode; +extern int Tree_NumPartImported; +extern int Tree_NumPartExported; +extern int Tree_ImportedNodeOffset; +extern int Tree_NextFreeNode; + +extern int *Tree_ResultIndexList; +extern int *Tree_Task_list; +extern MyDouble *Tree_Pos_list; +extern unsigned long long *Tree_IntPos_list; + +extern struct treepoint_data +{ + MyDouble Pos[3]; + unsigned long long IntPos[3]; + MyDouble Mass; + float OldAcc; + int index; + int th; + unsigned char level; + unsigned char Type; + unsigned char SofteningType : 7; +#ifndef HIERARCHICAL_GRAVITY + unsigned char ActiveFlag : 1; +#endif /* #ifndef HIERARCHICAL_GRAVITY */ + +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) + MyFloat GroupRad; + int GrNr; +#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */ +} * Tree_Points; + +extern struct resultsactiveimported_data +{ + MyFloat GravAccel[3]; +#ifdef EVALPOTENTIAL + MyFloat Potential; +#endif /* #ifdef EVALPOTENTIAL */ + int index; +} * Tree_ResultsActiveImported; + +extern char ParameterFile[MAXLEN_PATH]; /*!< file name of parameterfile used for starting the simulation */ + +extern FILE *FdInfo, /*!< file handle for info.txt log-file. */ + *FdEnergy, /*!< file handle for energy.txt log-file. */ + *FdTimings, /*!< file handle for timings.txt log-file. */ + *FdBalance, /*!< file handle for balance.txt log-file. */ + *FdTimebin, /*!< file handle for timebins.txt log-file. */ + *FdDomain, /*!< file handle for domain.txt log-file. */ + *FdMemory, /*!< file handle for memory.txt log-file. */ + *FdCPU; /*!< file handle for cpu.txt log-file. */ + +#ifdef DETAILEDTIMINGS +extern FILE *FdDetailed; +#endif /* #ifdef DETAILEDTIMINGS */ + +#ifdef OUTPUT_CPU_CSV +extern FILE *FdCPUCSV; /**< file handle for cpu.csv log-file. Used if the cpu log is printed in csv format as well. */ +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef RESTART_DEBUG +extern FILE *FdRestartTest; +#endif /* #ifdef RESTART_DEBUG */ + +#ifdef USE_SFR +extern FILE *FdSfr; /**< file handle for sfr.txt log-file. */ +#endif /* #ifdef USE_SFR */ + +#ifdef FORCETEST +extern FILE *FdForceTest; /*!< file handle for forcetest.txt log-file. */ +#endif /* #ifdef FORCETEST */ + +/*! Determines whether various dump files are written. Normally true, + set to false by Sunrise to avoid creating them. */ +extern int WriteMiscFiles; + +extern void *CommBuffer; /*!< points to communication buffer, which is used at a few places */ + +/*! \brief Global simulation data. + * + * Data which is the SAME for all tasks (mostly code parameters read + * from the parameter file). Holding this data in a structure is + * convenient for writing/reading the restart file, and it allows the + * introduction of new global variables in a simple way. The only + * thing to do is to introduce them into this structure. + */ +extern struct global_data_all_processes +{ + long long TotNumPart; /*!< total particle numbers (global value) */ + long long TotNumGas; /*!< total gas particle number (global value) */ + + int MaxPart; /*!< This gives the maxmimum number of particles that can be stored on one + processor. */ + int MaxPartSph; /*!< This gives the maxmimum number of SPH particles that can be stored on one + processor. */ + +#if defined(COOLING) + char TreecoolFile[MAXLEN_PATH]; +#endif /* #if defined(COOLING) */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + int TotPartSpecial, MaxPartSpecial; +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#if defined(REFINEMENT) + double ReferenceGasPartMass; +#endif /* #if defined(REFINEMENT) */ + +#ifdef REFINEMENT + double TargetGasMass; + double TargetGasMassFactor; + int RefinementCriterion; + int DerefinementCriterion; +#endif /* #ifdef REFINEMENT */ + + double TotGravCost; + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING + double AvgType1Mass; +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + + double MeanVolume; + + int MultipleDomains; + double TopNodeFactor; + + int ICFormat; /*!< selects different versions of IC file-format */ + + int SnapFormat; /*!< selects different versions of snapshot file-formats */ + + int NumFilesPerSnapshot; /*!< number of files in multi-file snapshot dumps */ + int NumFilesWrittenInParallel; /*!< maximum number of files that may be written/read simultaneously when + writing/reading restart-files, or when writing snapshot files */ + + double TreeAllocFactor; /*!< Each processor allocates a number of nodes which is TreeAllocFactor times + the maximum(!) number of particles. Note: A typical local tree for N + particles needs usually about ~0.65*N nodes. */ + + double TopNodeAllocFactor; /*!< Each processor allocates a number of nodes which is TreeAllocFactor times + the maximum(!) number of particles. Note: A typical local tree for N + particles needs usually about ~0.65*N nodes. */ + + double NgbTreeAllocFactor; /*!< Each processor allocates a number of nodes for the neighbor search which is NgbTreeAllocFactor times + the maximum(!) number of gas particles. Note: A typical local tree for N + particles needs usually about ~0.65*N nodes. */ + + int MaxMemSize; /*!< size of maximum memory consumption in MB */ + + /* some SPH parameters */ + + int DesNumNgb; /*!< Desired number of SPH neighbours */ + +#ifdef SUBFIND + int DesLinkNgb; + double ErrTolThetaSubfind; +#endif /* #ifdef SUBFIND */ + + double TotCountReducedFluxes; + double TotCountFluxes; + + double DtDisplacement; + + double MaxNumNgbDeviation; /*!< Maximum allowed deviation neighbour number */ + + double InitGasTemp; /*!< may be used to set the temperature in the IC's */ + double InitGasU; /*!< the same, but converted to thermal energy per unit mass */ + double MinGasTemp; /*!< may be used to set a floor for the gas temperature */ + double MinEgySpec; /*!< the minimum allowed temperature expressed as energy per unit mass; code will inject energy if a cell falls + below this limit */ + + double MinimumDensityOnStartUp; + + double GasSoftFactor; + + double LimitUBelowThisDensity; + double LimitUBelowCertainDensityToThisValue; + + /* some force counters */ + long long TotNumOfForces; /*!< counts total number of force computations */ + +#ifdef MULTIPLE_RESTARTS + int RestartFileCount; +#endif /* #ifdef MULTIPLE_RESTARTS */ + + /* various cosmological factors that are only a function of the current scale factor, and in non-comoving runs are set to 1 */ + double cf_atime, cf_a2inv, cf_a3inv, cf_afac1, cf_afac2, cf_afac3, cf_hubble_a, cf_time_hubble_a, cf_redshift; + /* Hubble rate at the current time, valid both for comoving and non-comoving integration */ + double cf_H; + /* Hubble expansion rate, but in non-comoving integration set to zero */ + double cf_Hrate; + + /* system of units */ + double UnitTime_in_s, /*!< factor to convert internal time unit to seconds/h */ + UnitMass_in_g, /*!< factor to convert internal mass unit to grams/h */ + UnitVelocity_in_cm_per_s, /*!< factor to convert internal velocity unit to cm/sec */ + UnitLength_in_cm, /*!< factor to convert internal length unit to cm/h */ + UnitPressure_in_cgs, /*!< factor to convert internal pressure unit to cgs units (little 'h' still + around!) */ + UnitDensity_in_cgs, /*!< factor to convert internal mass density unit to g/cm^3*h^2 */ + UnitCoolingRate_in_cgs, /*!< factor to convert internal cooling rate to cgs units */ + UnitEnergy_in_cgs, /*!< factor to convert internal energy to cgs units */ + UnitTime_in_Megayears, /*!< factor to convert internal time to megayears/h */ + GravityConstantInternal, /*!< If set to zero in the parameterfile, the internal value of the + gravitational constant is set to the Newtonian value based on the system of + units specified. Otherwise the value provided is taken as internal gravity + constant G. */ + G; /*!< Gravity-constant in internal units */ + + /* Cosmology */ + + double Hubble; /*!< Hubble-constant in internal units */ + double Omega0, /*!< matter density in units of the critical density (at z=0) */ + OmegaLambda, /*!< vaccum energy density relative to crictical density (at z=0) */ + OmegaBaryon, /*!< baryon density in units of the critical density (at z=0) */ + HubbleParam; /*!< little `h', i.e. Hubble constant in units of 100 km/s/Mpc. Only needed to get absolute + * physical values for cooling physics + */ + + double BoxSize; /*!< Boxsize in case periodic boundary conditions are used */ + + /* Code options */ + + int ComovingIntegrationOn; /*!< flags that comoving integration is enabled */ + int PeriodicBoundariesOn; /*!< flags that periodic boundaries are enabled for gravity */ + int ResubmitOn; /*!< flags that automatic resubmission of job to queue system is enabled */ + int TypeOfOpeningCriterion; /*!< determines tree cell-opening criterion: 0 for Barnes-Hut, 1 for relative + criterion */ + int TypeOfTimestepCriterion; /*!< gives type of timestep criterion (only 0 supported right now - unlike + gadget-1.1) */ + int OutputListOn; /*!< flags that output times are listed in a specified file */ + int CoolingOn; /*!< flags that cooling is enabled */ + int StarformationOn; /*!< flags that star formation is enabled */ + + int NParameters; + + int LowestActiveTimeBin; + int HighestActiveTimeBin; + int LowestOccupiedTimeBin; + int HighestOccupiedTimeBin; + int LowestOccupiedGravTimeBin; + int HighestOccupiedGravTimeBin; + int HighestSynchronizedTimeBin; + int SmallestTimeBinWithDomainDecomposition; + double ActivePartFracForNewDomainDecomp; + + /* parameters determining output frequency */ + + int SnapshotFileCount; /*!< number of snapshot that is written next */ + double TimeBetSnapshot, /*!< simulation time interval between snapshot files */ + TimeOfFirstSnapshot, /*!< simulation time of first snapshot files */ + CpuTimeBetRestartFile, /*!< cpu-time between regularly generated restart files */ + TimeLastRestartFile, /*!< cpu-time when last restart-file was written */ + TimeBetStatistics, /*!< simulation time interval between computations of energy statistics */ + TimeLastStatistics; /*!< simulation time when the energy statistics was computed the last time */ + int NumCurrentTiStep; /*!< counts the number of system steps taken up to this point */ + + /* Current time of the simulation, global step, and end of simulation */ + + double Time, /*!< current time of the simulation */ + TimeBegin, /*!< time of initial conditions of the simulation */ + TimeStep, /*!< difference between current times of previous and current timestep */ + TimeMax; /*!< marks the point of time until the simulation is to be evolved */ + + /* variables for organizing discrete timeline */ + + double Timebase_interval; /*!< factor to convert from floating point time interval to integer timeline */ + integertime Ti_Current; /*!< current time on integer timeline */ + integertime Previous_Ti_Current; + integertime Ti_nextoutput; /*!< next output time on integer timeline */ + integertime Ti_lastoutput; + + integertime Ti_begstep[TIMEBINS]; /*!< marks start of current step of each timebin on integer timeline */ + +#ifdef PMGRID + integertime PM_Ti_endstep, PM_Ti_begstep; + double Asmth[2], Rcut[2]; + double Corner[2][3], UpperCorner[2][3], Xmintot[2][3], Xmaxtot[2][3]; + double TotalMeshSize[2]; +#if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) + double MassPMregions[2]; +#endif /* #if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) */ +#endif /* #ifdef PMGRID */ + + long long GlobalNSynchronizedHydro; + long long GlobalNSynchronizedGravity; + + int LevelToTimeBin[GRAVCOSTLEVELS]; + int LevelHasBeenMeasured[GRAVCOSTLEVELS]; + + /* variables that keep track of cumulative CPU consumption */ + + double TimeLimitCPU; + double CPU_Sum[CPU_LAST]; /*!< sums wallclock time/CPU consumption in whole run */ + + /* tree code opening criterion */ + + double ErrTolTheta; /*!< BH tree opening angle */ + double ErrTolForceAcc; /*!< parameter for relative opening criterion in tree walk */ + + /* adjusts accuracy of time-integration */ + + double ErrTolIntAccuracy; /*!< accuracy tolerance parameter \f$ \eta \f$ for timestep criterion. The + timesteps is \f$ \Delta t = \sqrt{\frac{2 \eta eps}{a}} \f$ */ + + double MinSizeTimestep, /*!< minimum allowed timestep. Normally, the simulation terminates if the + timestep determined by the timestep criteria falls below this limit. */ + MaxSizeTimestep; /*!< maximum allowed timestep */ + +#ifdef TIMESTEP_OUTPUT_LIMIT + double TimestepOutputLimit; +#endif /* #ifdef TIMESTEP_OUTPUT_LIMIT */ + +#ifdef FORCE_EQUAL_TIMESTEPS + integertime GlobalTimeStep; +#endif /* #ifdef FORCE_EQUAL_TIMESTEPS */ + + double IsoSoundSpeed; + + double CourantFac; /*!< Hydrodynamics-Courant factor */ + +#ifdef REGULARIZE_MESH_FACE_ANGLE + double CellMaxAngleFactor; +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + double CellShapingFactor; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + double CellShapingSpeed; + + int CPU_TimeBinCountMeasurements[TIMEBINS]; + double CPU_TimeBinMeasurements[TIMEBINS][NUMBER_OF_MEASUREMENTS_TO_RECORD]; + + /* gravitational and hydrodynamical softening lengths (given in terms of an `equivalent' Plummer softening + * length) + * + */ + + int SofteningTypeOfPartType[NTYPES]; + + double SofteningComoving[NSOFTTYPES]; /*!< comoving gravitational softening lengths for each softeniung type */ + double SofteningMaxPhys[NSOFTTYPES]; /*!< maximum physical gravitational softening lengths for each softening type */ + + double + SofteningTable[NSOFTTYPES + NSOFTTYPES_HYDRO]; /*!< current (comoving) gravitational softening lengths for each softening type */ + double ForceSoftening[NSOFTTYPES + NSOFTTYPES_HYDRO + 1]; /*!< current (comoving) gravitational softening lengths, multiplied by a + factor 2.8 - at that scale the force is Newtonian */ + + /*! If particle masses are all equal for one type, the corresponding entry in MassTable is set to this + * value, * allowing the size of the snapshot files to be reduced + */ + double MassTable[NTYPES]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + double MinimumComovingHydroSoftening; + double AdaptiveHydroSofteningSpacing; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + + /* some filenames */ + char InitCondFile[MAXLEN_PATH], OutputDir[MAXLEN_PATH], SnapshotFileBase[MAXLEN_PATH], ResubmitCommand[MAXLEN_PATH], + OutputListFilename[MAXLEN_PATH]; + + /*! table with desired output times */ + double OutputListTimes[MAXLEN_OUTPUTLIST]; + char OutputListFlag[MAXLEN_OUTPUTLIST]; + int OutputListLength; /*!< number of times stored in table of desired output times */ + +#ifdef USE_SFR /* enable Springel & Hernquist model */ + double OverDensThresh; + double CritOverDensity; + double TemperatureThresh; + double CritPhysDensity; + double PhysDensThresh; + double EgySpecSN; + double EgySpecCold; + double FactorEVP; + double TempSupernova; + double TempClouds; + double MaxSfrTimescale; + double FactorSN; +#endif /* #ifdef USE_SFR */ + +#ifdef MHD_POWELL + double Powell_Momentum[3]; + double Powell_Angular_Momentum[3]; + double Powell_Energy; +#endif /* #ifdef MHD_POWELL */ + +#ifdef MHD_SEEDFIELD + int B_dir; /* flags for direction: x = 1, y = 2, z = 4 */ + double B_value; /* value for the chosen component(s) of the magnetic field */ +#endif /* #ifdef MHD_SEEDFIELD */ + + MyIDType MaxID; + +#ifdef REFINEMENT_VOLUME_LIMIT + double MaxVolumeDiff; + double MinVolume; + double MaxVolume; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + +#ifdef REDUCE_FLUSH + double FlushCpuTimeDiff; + double FlushLast; +#endif /* #ifdef REDUCE_FLUSH */ + +#ifdef TILE_ICS + int TileICsFactor; +#endif /* #ifdef TILE_ICS */ + +#ifdef ADDBACKGROUNDGRID + int GridSize; +#endif /* #ifdef ADDBACKGROUNDGRID */ + +#ifdef ONEDIMS_SPHERICAL + double CoreMass; + double CoreRadius; +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + double GlobalDisplacementVector[3]; +} All; + +/***************************************************************************** + ** particle data ************************************************************ + ****************************************************************************/ + +/*! \brief This structure holds all the information that is + * stored for each particle of the simulation. + */ +extern struct particle_data +{ + MyDouble Pos[3]; /*!< particle position at its current time */ + MyDouble Mass; /*!< particle mass */ + MyFloat Vel[3]; /*!< particle velocity at its current time */ + MySingle GravAccel[3]; /*!< particle acceleration due to gravity */ + +#ifdef EXTERNALGRAVITY + MySingle dGravAccel; /*!< norm of spatial derivatives tensor of gravity accelerations due to external force */ +#endif + +#ifdef PMGRID + MySingle GravPM[3]; /*!< particle acceleration due to long-range PM gravity force */ +#endif /* #ifdef PMGRID */ + +#ifdef FORCETEST + MyFloat GravAccelDirect[3]; /*!< particle acceleration calculated by direct summation */ + MyFloat PotentialDirect; /*!< potential computed with direct summation */ + MyFloat DistToID1; +#ifdef PMGRID + MyFloat GravAccelShortRange[3]; /*!< short range component of gravitational acceleration */ + MyFloat GravAccelLongRange[3]; /*!< long range component of gravitational acceleration */ + MyFloat PotentialShortRange; /*!< potential due to short-range forces */ + MyFloat PotentialLongRange; /*!< potential due to long-range forces */ +#endif /* #ifdef PMGRID */ +#endif /* #ifdef FORCETEST */ + +#if defined(EVALPOTENTIAL) || defined(OUTPUTPOTENTIAL) + MySingle Potential; /*!< gravitational potential */ +#if defined(PMGRID) + MySingle PM_Potential; /*!< gravitational potential in Particle-Mesh */ +#endif /* #if defined(PMGRID) */ +#endif /* #if defined(EVALPOTENTIAL) || defined (OUTPUTPOTENTIAL) */ + +#ifdef OUTPUTGRAVINTERACTIONS + int GravInteractions; /*!< number of gravitational ineractions calculated */ +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + +#ifdef EXTERNALGRAVITY + MyFloat ExtPotential; /*!< value of external potential */ +#endif /* #ifdef EXTERNALGRAVITY */ + + MyIDType ID; /*!< unique ID of particle */ + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + MyIDType FileOrder; +#endif /* #ifdefined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + + integertime Ti_Current; /*!< current time on integer timeline */ + + float OldAcc; /*!< magnitude of old gravitational force. Used in relative opening criterion */ + + float GravCost[GRAVCOSTLEVELS]; /*!< weight factors used for balancing the work-load */ + + unsigned char Type; /*!< flags particle type. 0=gas, 1=halo, 2=disk, 3=bulge, 4=stars, 5=bndry */ + unsigned char SofteningType; + signed char TimeBinGrav; + signed char TimeBinHydro; +} * P, /*!< holds particle data on local processor */ + *DomainPartBuf; /*!< buffer for particle data used in domain decomposition */ + +/***************************************************************************** + ** (sub)halo data *********************************************************** + ****************************************************************************/ + +extern struct subfind_data +{ + int OriginIndex, OriginTask; + int TargetIndex, TargetTask; + int GrNr; + +#ifdef SUBFIND + int SubNr; + int OldIndex; + int submark; + int originindex, origintask; + MyFloat Utherm; + MyFloat Density; + MyFloat Potential; + MyFloat Hsml; + MyFloat BindingEnergy; + +#ifdef CELL_CENTER_GRAVITY + MyDouble Center[3]; +#endif /* #ifdef CELL_CENTER_GRAVITY */ + +#ifdef SUBFIND_CALC_MORE + MyFloat SubfindHsml; + MyFloat SubfindDensity; /* total matter density */ + MyFloat SubfindDMDensity; /* dark matter density */ + MyFloat SubfindVelDisp; /* 3D DM velocity dispersion */ +#endif /* #ifdef SUBFIND_CALC_MORE */ + +#endif /* #ifdef SUBFIND */ +} * PS; + +/***************************************************************************** + ** cell data **************************************************************** + ****************************************************************************/ + +/*! \brief Holds data that is stored for each hydro mesh cell in addition to + * the collisionless variables. + */ +extern struct sph_particle_data +{ + /* conserved variables */ + MyFloat Energy; + MyFloat Momentum[3]; + MyFloat Volume; + MyFloat OldMass; + + /* primitive variables */ + MyFloat Density; + MyFloat Pressure; /*!< current pressure */ + MySingle Utherm; + +#ifdef HIERARCHICAL_GRAVITY + MySingle FullGravAccel[3]; +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + /* variables for mesh */ + MyDouble Center[3]; /*!< center of mass of cell */ + MySingle VelVertex[3]; /*!< current vertex velocity (primitive variable) */ + + MySingle MaxDelaunayRadius; + MySingle Hsml; /* auxiliary search radius for points around a delaunay triangle */ + MySingle SurfaceArea; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + MySingle MaxFaceAngle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + + MySingle ActiveArea; + +#if defined(OUTPUT_DIVVEL) + MyFloat DivVel; /*!< divergence of the velocity field */ +#endif /* #if defined(OUTPUT_DIVVEL) */ + +#if defined(REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED) || defined(OUTPUT_CURLVEL) + MySingle CurlVel; /*!< magnitude of the curl of the velocity field */ +#endif /* #if defined(REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED) || defined(OUTPUT_CURLVEL) */ + +#ifdef TREE_BASED_TIMESTEPS + MySingle CurrentMaxTiStep; + MySingle Csnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + +#if defined(REFINEMENT_HIGH_RES_GAS) + MyFloat HighResMass; + MyFloat HighResDensity; +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + +#ifdef MHD + MyFloat B[3]; + MyFloat BConserved[3]; + MyFloat DivB; + MyFloat CurlB[3]; +#endif /* #ifdef MHD */ + +#ifdef PASSIVE_SCALARS + MyFloat PScalars[PASSIVE_SCALARS]; + MyFloat PConservedScalars[PASSIVE_SCALARS]; +#endif /* #ifdef PASSIVE_SCALARS */ + +#ifdef OUTPUT_SURFACE_AREA + int CountFaces; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#if defined(REFINEMENT_SPLIT_CELLS) + MySingle MinimumEdgeDistance; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + +#if defined(COOLING) + MyFloat Ne; /* electron fraction, expressed as local electron number + density normalized to the hydrogen number density. Gives + indirectly ionization state and mean molecular weight. */ +#endif /* #if defined(COOLING) */ + +#ifdef USE_SFR + MySingle Sfr; +#endif /* #ifdef USE_SFR */ + +#ifdef OUTPUT_COOLHEAT + MyFloat CoolHeat; +#endif /* #ifdef OUTPUT_COOLHEAT */ + + struct grad_data Grad; + + int first_connection; + int last_connection; + +#ifdef REFINEMENT_HIGH_RES_GAS + int AllowRefinement; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + +#ifdef REFINEMENT_SPLIT_CELLS + MySingle SepVector[3]; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + +#ifdef REFINEMENT_VOLUME_LIMIT + MyFloat MinNgbVolume; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + + double TimeLastPrimUpdate; + +#ifdef ADDBACKGROUNDGRID + MyFloat Weight; +#endif /* #ifdef ADDBACKGROUNDGRID */ + +} * SphP, /*!< holds SPH particle data on local processor */ + *DomainSphBuf; /*!< buffer for SPH particle data in domain decomposition */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +extern struct special_particle_data +{ + MyIDType ID; + double pos[3]; + double mass; +} * PartSpecialListGlobal; +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +extern peanokey *DomainKeyBuf; + +/*! global state of system + */ +extern struct state_of_system +{ + double Mass, EnergyKin, EnergyPot, EnergyInt, EnergyTot, Momentum[4], AngMomentum[4], CenterOfMass[4], MassComp[NTYPES], + EnergyKinComp[NTYPES], EnergyPotComp[NTYPES], EnergyIntComp[NTYPES], EnergyTotComp[NTYPES], MomentumComp[NTYPES][4], + AngMomentumComp[NTYPES][4], CenterOfMassComp[NTYPES][4]; +} SysState, SysStateAtStart, SysStateAtEnd; + +/*! \brief Struct used for passing the parameters during the mesh cell search. + */ +typedef struct +{ + MyDouble Pos[3]; + int Task; + union + { + int Index; + float hsmlguess; + } u; + +} mesh_search_data; + +/*! \brief Struct used for sending positions to other tasks during the + * mesh cell search. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Distance; +} mesh_search_request; + +/*! \brief Struct used for receiving the results from other tasks during the + * mesh cell search. + */ +typedef struct +{ + MyDouble Distance; + int Task; + int Index; +} mesh_search_response; + +extern struct data_partlist +{ + int Task; /*!< The task the item was exported to. */ + int Index; /*!< The particle index of the item on the sending task. */ +} * PartList; + +extern struct datanodelist +{ + int Task; /*!< target process */ + int Index; /*!< local index that wants to open this node */ + int Node; /*!< node to be opened on foreign process */ +} * NodeList; + +#define FAC_AVG_NODES_PER_EXPORT 4.0 /*!< default choice for estimated average number of exported nodes per exported particle */ + +extern struct directdata +{ + MyDouble Pos[3]; + MyDouble Mass; + unsigned char Type; + unsigned char SofteningType; +} * DirectDataIn, *DirectDataAll; + +extern struct accdata +{ + MyFloat Acc[3]; +#ifdef EVALPOTENTIAL + MyFloat Potential; +#endif /* #ifdef EVALPOTENTIAL */ +} * DirectAccOut, *DirectAccIn; + +#if defined(EVALPOTENTIAL) || defined(OUTPUTPOTENTIAL) || defined(SUBFIND) +extern struct potdata_out +{ + MyFloat Potential; +} + /*! \brief Holds the partial results computed for imported particles. Note: + * We use GravDataResult = GravDataGet, such that the result replaces + * the imported data + */ + * PotDataResult, + /*! \brief Holds partial results received from other processors. This will + * overwrite the GravDataIn array + */ + *PotDataOut; +#endif /* #if defined (EVALPOTENTIAL) || defined (OUTPUTPOTENTIAL) || defined(SUBFIND) */ + +/*! \brief Buffer of size NTask used for flagging whether a particle needs to + * be exported to the other tasks. + */ +extern int *Exportflag; +/*! \brief Buffer of size NTask used for counting how many nodes are to be + * exported to the other tasks? + */ +extern int *Exportnodecount; +/*! \brief Buffer of size NTask used for holding the index into the + * DataIndexTable. + */ +extern int *Exportindex; +/*! \brief Array of NTask size of the offset into the send array where the + * objects to be sent to the specified task starts. + */ +extern int *Send_offset, + /*! \brief Array of NTask size of the number of objects to send to the + * tasks. + */ + *Send_count, + /*! \brief Array of NTask size of the number of objects to receive from the + * tasks. + */ + *Recv_count, + /*! \brief Array of NTask size of the offset into the receive array where the + * objects from the specified task starts. + */ + *Recv_offset; + +extern int *TasksThatSend, *TasksThatRecv, NSendTasks, NRecvTasks; + +extern struct send_recv_counts +{ + int Count; + int CountNodes; +} * Send, *Recv; + +extern int *Send_offset_nodes, *Send_count_nodes, *Recv_count_nodes, *Recv_offset_nodes; + +extern int Mesh_nimport, Mesh_nexport, *Mesh_Send_offset, *Mesh_Send_count, *Mesh_Recv_count, *Mesh_Recv_offset; + +extern int Force_nimport, Force_nexport, *Force_Send_offset, *Force_Send_count, *Force_Recv_count, *Force_Recv_offset; + +/*! \brief Header for the standard file format. + */ +#if(NTYPES == 7 || NTYPES == 8) +#define NTYPES_INT_HEADER 8 +#else /* #if (NTYPES==7 || NTYPES==8) */ +#define NTYPES_INT_HEADER NTYPES +#endif /* #if (NTYPES==7 || NTYPES==8) #else */ +extern struct io_header +{ + int npart[NTYPES_INT_HEADER]; /*!< number of particles of each type in this file */ + double mass[NTYPES]; /*!< mass of particles of each type. If 0, then the masses are explicitly + stored in the mass-block of the snapshot file, otherwise they are omitted */ + double time; /*!< time of snapshot file */ + double redshift; /*!< redshift of snapshot file */ + int flag_sfr; /*!< flags whether the simulation was including star formation */ + int flag_feedback; /*!< flags whether feedback was included (obsolete) */ + unsigned int npartTotal[NTYPES_INT_HEADER]; /*!< total number of particles of each type in this snapshot. This can be + different from npart if one is dealing with a multi-file snapshot. */ + int flag_cooling; /*!< flags whether cooling was included */ + int num_files; /*!< number of files in multi-file snapshot */ + double BoxSize; /*!< box-size of simulation in case periodic boundaries were used */ + double Omega0; /*!< matter density in units of critical density */ + double OmegaLambda; /*!< cosmological constant parameter */ + double HubbleParam; /*!< Hubble parameter in units of 100 km/sec/Mpc */ + int flag_stellarage; /*!< flags whether the file contains formation times of star particles */ + int flag_metals; /*!< flags whether the file contains metallicity values for gas and star + particles */ + unsigned int npartTotalHighWord[NTYPES_INT_HEADER]; /*!< High word of the total number of particles of each type */ + int flag_entropy_instead_u; /*!< flags that IC-file contains entropy instead of u */ + int flag_doubleprecision; /*!< flags that snapshot contains double-precision instead of single precision */ + + int flag_lpt_ics; /*!< flag to signal that IC file contains 2lpt initial conditions */ + float lpt_scalingfactor; /*!< scaling factor for 2lpt initial conditions */ + + int flag_tracer_field; /*!< flags presence of a tracer field */ + + int composition_vector_length; /*!< specifies the length of the composition vector (0 if not present) */ + +#if(NTYPES == 6) + char fill[40]; /*!< fills to 256 Bytes */ +#elif(NTYPES == 7) /* #if (NTYPES==6) */ + char fill[8]; /*!< fills to 256 Bytes */ +#endif /* #elif (NTYPES==7) */ +} header; /*!< holds header for snapshot files */ + +/*! \brief Header for the ICs file format, if NTYPES does not match. + */ +#ifdef NTYPES_ICS +extern struct io_header_ICs +{ + int npart[NTYPES_ICS]; /*!< number of particles of each type in this file */ + double mass[NTYPES_ICS]; /*!< mass of particles of each type. If 0, then the masses are explicitly + stored in the mass-block of the snapshot file, otherwise they are omitted */ + double time; /*!< time of snapshot file */ + double redshift; /*!< redshift of snapshot file */ + int flag_sfr; /*!< flags whether the simulation was including star formation */ + int flag_feedback; /*!< flags whether feedback was included (obsolete) */ + unsigned int npartTotal[NTYPES_ICS]; /*!< total number of particles of each type in this snapshot. This can be + different from npart if one is dealing with a multi-file snapshot. */ + int flag_cooling; /*!< flags whether cooling was included */ + int num_files; /*!< number of files in multi-file snapshot */ + double BoxSize; /*!< box-size of simulation in case periodic boundaries were used */ + double Omega0; /*!< matter density in units of critical density */ + double OmegaLambda; /*!< cosmological constant parameter */ + double HubbleParam; /*!< Hubble parameter in units of 100 km/sec/Mpc */ + int flag_stellarage; /*!< flags whether the file contains formation times of star particles */ + int flag_metals; /*!< flags whether the file contains metallicity values for gas and star + particles */ + unsigned int npartTotalHighWord[NTYPES_ICS]; /*!< High word of the total number of particles of each type */ + int flag_entropy_instead_u; /*!< flags that IC-file contains entropy instead of u */ + int flag_doubleprecision; /*!< flags that snapshot contains double-precision instead of single precision */ + + int flag_lpt_ics; /*!< flag to signal that IC file contains 2lpt initial conditions */ + float lpt_scalingfactor; /*!< scaling factor for 2lpt initial conditions */ + + int flag_tracer_field; /*!< flags presence of a tracer field */ + + int composition_vector_length; /*!< specifies the length of the composition vector (0 if not present) */ + +#if(NTYPES_ICS == 6) + char fill[40]; /*!< fills to 256 Bytes */ +#else /* #if (NTYPES_ICS==6) */ + terminate("NTYPES_ICS != 6") +#endif /* #if (NTYPES_ICS==6) #else */ +} header_ICs; /*!< holds header for IC files */ +#endif /* #ifdef NTYPES_ICS */ + +enum iofields +{ + IO_POS, + IO_VEL, + IO_ID, + IO_MASS, + IO_U, + IO_RHO, + IO_VORT, + IO_VOL, + IO_CM, + IO_VERTEXVEL, + IO_FACEANGLE, + IO_SAREA, + IO_NFACES, + + IO_HIGHRESMASS, + IO_PRESSURE, + IO_CSND, + IO_NE, + IO_NH, + IO_SFR, + + IO_POT, + IO_ACCEL, + IO_GRADP, + IO_GRADR, + IO_GRADV, + IO_GRADB, + + IO_POT_MINI, + IO_POS_MINI, + + IO_HI, + IO_TSTP, + IO_BFLD, + IO_DIVB, + IO_COOLRATE, + IO_ALLOWREFINEMENT, + + IO_DIVVEL, + IO_CURLVEL, + IO_COOLHEAT, + IO_PASS, + + IO_SUBFINDHSML, + IO_SUBFINDDENSITY, + IO_SUBFINDDMDENSITY, + IO_SUBFINDVELDISP, + IO_GROUPNR, + + IO_SOFTENING, + IO_TASK, + IO_TIMEBIN_HYDRO, + + IO_LASTENTRY /* This should be kept - it signals the end of the list */ +}; + +enum arrays +{ + A_NONE, + A_SPHP, + A_P, + A_PS +}; + +enum types_in_file +{ + FILE_NONE = -1, + FILE_INT = 0, + FILE_MY_ID_TYPE = 2, + FILE_MY_IO_FLOAT = 1, + FILE_DOUBLE = 3, + FILE_FLOAT = 4 +}; + +enum types_in_memory +{ + MEM_INT, + MEM_MY_ID_TYPE, + MEM_FLOAT, + MEM_DOUBLE, + MEM_MY_SINGLE, + MEM_MY_FLOAT, + MEM_MY_DOUBLE, + MEM_NONE +}; + +enum e_typelist +{ + GAS_ONLY = 1, + STARS_ONLY = 16, + GAS_AND_STARS = 17, + BHS_ONLY = 32, + ALL_TYPES = ((1 << NTYPES) - 1), + SET_IN_GET_PARTICLES_IN_BLOCK = 0 +}; + +enum sn_type +{ + SN_FULL = 0, + SN_MINI = 1, + SN_MINI_ONLY = 2, + SN_NO_SUBBOX = 3 +}; + +typedef struct +{ + enum iofields field; + enum types_in_memory type_in_memory; + enum types_in_file type_in_file_input; + enum types_in_file type_in_file_output; + int values_per_block; + char label[4]; + char datasetname[256]; + void (*io_func)(int, int, void *, int); + int typelist; + enum arrays array; + size_t offset; + enum sn_type snap_type; + + char hasunit; + double a; + double h; + double L; + double M; + double V; + double c; +} IO_Field; + +extern IO_Field *IO_Fields; +extern int N_IO_Fields; +extern int Max_IO_Fields; + +extern char (*Parameters)[MAXLEN_PARAM_TAG]; +extern char (*ParametersValue)[MAXLEN_PARAM_VALUE]; +extern char *ParametersType; + +/*! \brief The tree data structure. + * + * Nodes points to the actual memory + * allocated for the internal nodes, but is shifted such that + * Nodes[All.MaxPart] gives the first allocated node. Note that node + * numbers less than All.MaxPart are the leaf nodes that contain a + * single particle, and node numbers >= MaxPart+MaxNodes are "pseudo + * particles" that hang off the toplevel leaf nodes belonging to + * other tasks. These are not represented by this structure. Instead, + * the tree traversal for these are saved in the Nextnode, Prevnode + * and Father arrays, indexed with the node number in the case of + * real particles and by nodenumber-MaxNodes for pseudo + * particles. + */ +extern struct NODE +{ + union + { + int suns[8]; /*!< temporary pointers to daughter nodes */ + struct + { + MyDouble s[3]; /*!< center of mass of node */ + MyDouble mass; /*!< mass of node */ + /*! The next node in the tree walk in case the current node does + * not need to be opened. This means that it traverses the 8 + * subnodes of a node in a breadth-first fashion, and then goes + * to father->sibling. + */ + int sibling; + /*! The next node in case the current node needs to be + * opened. Applying nextnode repeatedly results in a pure + * depth-first traversal of the tree. + */ + int nextnode; + /*! The parent node of the node. (Is -1 for the root node.) + */ + int father; +#if(NSOFTTYPES > 1) + unsigned char maxsofttype; /**< hold the maximum gravitational softening of particles */ +#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */ +#endif /* #if (NSOFTTYPES > 1) */ + } d; + } u; + + MyDouble center[3]; /*!< geometrical center of node */ + MyFloat len; /*!< sidelength of treenode */ + +} * Nodes; + +#ifdef MULTIPLE_NODE_SOFTENING +extern struct ExtNODE +{ + MyDouble mass_per_type[NSOFTTYPES]; +} * ExtNodes; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + +/*! Gives next node in tree walk for the "particle" nodes. Entries 0 + * -- MaxPart-1 are the real particles, and the "pseudoparticles" are + * indexed by the node number-MaxNodes. + */ +extern int *Nextnode; + +/*! Gives previous node in tree walk for the leaf (particle) + * nodes. Entries 0 -- MaxPart-1 are the real particles, and the + * "pseudoparticles" are indexed by the node number-MaxNodes. + */ +extern int *Father; + +/*! Variables for neighbor tree */ +extern int Ngb_MaxPart; +extern int Ngb_NumNodes; +extern int Ngb_MaxNodes; +extern int Ngb_FirstNonTopLevelNode; +extern int Ngb_NextFreeNode; +extern int *Ngb_Father; +extern int *Ngb_Marker; +extern int Ngb_MarkerValue; + +extern int *Ngb_DomainNodeIndex; +extern int *DomainListOfLocalTopleaves; +extern int *DomainNLocalTopleave; +extern int *DomainFirstLocTopleave; +extern int *Ngb_Nextnode; + +/*! The ngb-tree data structure + */ +extern struct NgbNODE +{ + union + { + int suns[8]; /*!< temporary pointers to daughter nodes */ + struct + { + int sibling; + int nextnode; + MyNgbTreeFloat range_min[3]; + MyNgbTreeFloat range_max[3]; + } d; + } u; + + MyNgbTreeFloat vertex_vmin[3]; + MyNgbTreeFloat vertex_vmax[3]; + + int father; + + integertime Ti_Current; + +} * Ngb_Nodes; + +extern struct ExtNgbNODE +{ + float vmin[3]; + float vmax[3]; + float MaxCsnd; +} * ExtNgb_Nodes; + +#ifdef STATICNFW +extern double Rs, R200; +extern double Dc; +extern double RhoCrit, V200; +extern double fac; +#endif /* #ifdef STATICNFW */ + +extern int MaxThreads; + +#endif /* #define ALLVARS_H */ diff --git a/src/amuse/community/arepo/src/main/main.c b/src/amuse/community/arepo/src/main/main.c new file mode 100644 index 0000000000..f1ae80be6a --- /dev/null +++ b/src/amuse/community/arepo/src/main/main.c @@ -0,0 +1,296 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/main.c + * \date 05/2018 + * \brief Start of the program. + * \details contains functions: + * int main(int argc, char **argv) + * void endrun() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +// #ifdef HAVE_HDF5 +// #include +// #endif /* #ifdef HAVE_HDF5 */ + +/*! \brief The entry point of the program. + * + * This function initializes the MPI communication packages, and sets + * cpu-time counters to 0. Then begrun1() is called, which sets up + * the simulation. Then either IC's or restart files are loaded. In + * case of IC's init() is called which prepares the IC's for the run. + * A call to begrun2() finishes the initialization. Finally, run() is + * started, the main simulation loop, which iterates over the timesteps. + * + * \param[in] argc Argument count from command line. + * \param[in] argv Argument vector from command line. + * + * \return status of exit; 0 for normal exit. + */ +int main(int argc, char **argv) +{ +// #ifdef IMPOSE_PINNING +// detect_topology(); +// get_core_set(); +// #endif /* #ifdef IMPOSE_PINNING */ + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); + MPI_Comm_size(MPI_COMM_WORLD, &NTask); + + /* output a welcome message */ + hello(); + + /* initialize CPU-time/Wallclock-time measurement */ + init_cpu_log(); + + determine_compute_nodes(); + +// #ifdef IMPOSE_PINNING +// /* pin the MPI ranks to the available core set */ +// pin_to_core_set(); +// report_pinning(); +// #endif /* #ifdef IMPOSE_PINNING */ + +// #ifdef HOST_MEMORY_REPORTING +// mpi_report_committable_memory(); +// #endif /* #ifdef HOST_MEMORY_REPORTING */ + + for(PTask = 0; NTask > (1 << PTask); PTask++) + ; + + begrun0(); + + // if(argc < 2) + // { + // if(ThisTask == 0) + // { + // printf("\nParameters are missing. \n"); + // printf("Call with [] [] []\n"); + // printf("\n"); + // printf(" RestartFlag Action\n"); + // printf(" 0 Read initial conditions and start simulation\n"); + // printf(" 1 Read restart files and resume simulation\n"); + // printf(" 2 Restart from specified snapshot dump and resume simulation\n"); + // printf(" 3 Run FOF and optionally SUBFIND: [ for SUBBOX_SNAPSHOTS]\n"); + // printf( + // " 6 Convert snapshot file to different format [input=ICFormat output=SnapFormat NOTE: derived " + // "quantities have round-off errors!\n"); + // printf(" 14 Write out the Voronoi mesh: \n"); + // printf(" 17 Write out snapshot dump with measured gradients\n"); + // printf(" 18 Recalculate gravitational potential values for specified snaphot dump: \n"); + // printf("\n"); + // } + // endrun(); + // } + + strcpy(ParameterFile, "param.txt"); /* Removing command line parsing. argv[1] replaced with "param.txt". */ + + // if(argc >= 3) + // RestartFlag = atoi(argv[2]); + // else + RestartFlag = 0; + + // if(argc >= 4) + // RestartSnapNum = atoi(argv[3]); + // else + // RestartSnapNum = -1; + + // Do minimal validation of arguments here rather than in random places in the code + // if((RestartFlag == 3 || RestartFlag == 6 || RestartFlag == 14 || RestartFlag == 17 || RestartFlag == 18) && RestartSnapNum < 0) + // { + // mpi_printf("Need to give the snapshot number\n"); + // return (0); + // } + +// #ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT +// if(RestartFlag == 18) +// { +// mpi_printf("Need RECOMPUTE_POTENTIAL_IN_SNAPSHOT for this option\n"); +// return (0); +// } +// #endif /* #ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT */ + +// #ifdef RUNNING_SAFETY_FILE +// /* do not run if 'running' safety file exists */ +// int runningflag = 0; +// if(ThisTask == 0) +// { +// FILE *fd; +// char runningfname[MAXLEN_PATH]; + +// sprintf(runningfname, "./running"); +// if((fd = fopen(runningfname, "r"))) /* Is the running-file present? If yes, interrupt the run. */ +// { +// fclose(fd); +// printf("running-file detected. stopping.\n"); +// runningflag = 1; +// } +// } +// MPI_Bcast(&runningflag, 1, MPI_INT, 0, MPI_COMM_WORLD); +// if(runningflag) +// { +// MPI_Finalize(); /* do not call endrun() */ +// return 0; +// } +// else +// { +// /* touch a running safety file */ +// if(ThisTask == 0) +// { +// FILE *fd; +// char runningfname[MAXLEN_PATH]; + +// sprintf(runningfname, "./running"); +// if((fd = fopen(runningfname, "w"))) +// { +// fclose(fd); +// printf("touching a running-file: %s \n", runningfname); +// } +// else +// terminate("could not touch a running-file: %s\n", runningfname); +// } +// } +// #endif /* #ifdef RUNNING_SAFETY_FILE */ + + begrun1(); /* set-up run */ + + /* see if we are loading a restart file or an IC file */ + // if(RestartFlag == 1) + // loadrestart(); + // else + // { + /* We're reading an IC file. Is it a snapshot or really an IC? */ + char fname[MAXLEN_PATH]; + + // if(RestartFlag >= 2 && RestartSnapNum >= 0) + // { + // if(All.NumFilesPerSnapshot > 1) + // sprintf(fname, "%s/snapdir_%03d/%s_%03d", All.OutputDir, RestartSnapNum, All.SnapshotFileBase, RestartSnapNum); + // else + // sprintf(fname, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, RestartSnapNum); + // } + // else + strcpy(fname, All.InitCondFile); + + /* now we can load the file */ + +#ifdef READ_DM_AS_GAS + read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES); +#else /* #ifdef READ_DM_AS_GAS */ + read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES); +#endif /* #ifdef READ_DM_AS_GAS #else */ + + /* If we are supposed to just convert the file, write and exit here. */ + // if(RestartFlag == 6) + // { + // /* important for proper functioning of FOF+SUBFIND */ + // if(All.ComovingIntegrationOn) /* change to new velocity variable */ + // { + // int i, j; + // for(i = 0; i < NumPart; i++) + // for(j = 0; j < 3; j++) + // P[i].Vel[j] *= sqrt(All.Time) * All.Time; + // } + // set_softenings(); + // All.TopNodeAllocFactor = 0.08; + // All.TreeAllocFactor = 0.7; + // All.NgbTreeAllocFactor = 0.7; + + // sprintf(All.SnapshotFileBase, "%s_converted", All.SnapshotFileBase); + // mpi_printf("Start writing file %s\nRestartSnapNum %d\n", All.SnapshotFileBase, RestartSnapNum); + // savepositions(RestartSnapNum, 0); + // endrun(); + // } + + /* init returns a status code, where a value of >=0 means that endrun() should be called. */ + int status = init(); + + if(status >= 0) + { + if(status > 0) + mpi_printf("init() returned with %d\n", status); + + endrun(); + } + // } + + begrun2(); + + run(); /* main simulation loop */ + + endrun(); /* clean up & finalize MPI */ + + return 0; +} + +/*! \brief This function ends the simulations in case of no error. + * + * This method has to be called by all processes. It should be used only + * if the simulation ends without a errors. + * Otherwise terminate() should be used instead. + * + * \return void + */ +void endrun() +{ + mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + fflush(stdout); + +#ifdef HAVE_HDF5 + /*The hdf5 library will sometimes register an atexit() handler that calls its + * error handler. In AREPO this is set to my_hdf_error_handler, which calls + * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed. + * Hence unset the HDF error handler here + */ + H5Eset_auto(NULL, NULL); +#endif /* #ifdef HAVE_HDF5 */ + +// #ifdef RUNNING_SAFETY_FILE +// if(All.Ti_Current < TIMEBASE) /* simulation has not reached the final time */ +// { +// char running_fname[MAXLEN_PATH], running_done_fname[MAXLEN_PATH]; +// sprintf(running_fname, "./running"); +// sprintf(running_done_fname, "./running_done"); +// rename(running_fname, running_done_fname); +// mpi_printf("moved ./running file to ./running_done, job can now restart.\n"); +// } +// else +// mpi_printf("leaving ./running file in place since run is complete to prevent any restarts.\n"); +// #endif /* #ifdef RUNNING_SAFETY_FILE */ + + MPI_Finalize(); + exit(0); +} diff --git a/src/amuse/community/arepo/src/main/main_original.c b/src/amuse/community/arepo/src/main/main_original.c new file mode 100644 index 0000000000..629e988526 --- /dev/null +++ b/src/amuse/community/arepo/src/main/main_original.c @@ -0,0 +1,299 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/main.c + * \date 05/2018 + * \brief Start of the program. + * \details contains functions: + * int main(int argc, char **argv) + * void endrun() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef HAVE_HDF5 +#include +#endif /* #ifdef HAVE_HDF5 */ + +/*! \brief The entry point of the program. + * + * This function initializes the MPI communication packages, and sets + * cpu-time counters to 0. Then begrun1() is called, which sets up + * the simulation. Then either IC's or restart files are loaded. In + * case of IC's init() is called which prepares the IC's for the run. + * A call to begrun2() finishes the initialization. Finally, run() is + * started, the main simulation loop, which iterates over the timesteps. + * + * \param[in] argc Argument count from command line. + * \param[in] argv Argument vector from command line. + * + * \return status of exit; 0 for normal exit. + */ +int main(int argc, char **argv) +{ +#ifdef IMPOSE_PINNING + detect_topology(); + get_core_set(); +#endif /* #ifdef IMPOSE_PINNING */ + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); + MPI_Comm_size(MPI_COMM_WORLD, &NTask); + + /* output a welcome message */ + hello(); + + /* initialize CPU-time/Wallclock-time measurement */ + init_cpu_log(); + + determine_compute_nodes(); + +#ifdef IMPOSE_PINNING + /* pin the MPI ranks to the available core set */ + pin_to_core_set(); + report_pinning(); +#endif /* #ifdef IMPOSE_PINNING */ + +#ifdef HOST_MEMORY_REPORTING + mpi_report_committable_memory(); +#endif /* #ifdef HOST_MEMORY_REPORTING */ + + Argc = argc; + Argv = argv; + + for(PTask = 0; NTask > (1 << PTask); PTask++) + ; + + begrun0(); + + if(argc < 2) + { + if(ThisTask == 0) + { + printf("\nParameters are missing. \n"); + printf("Call with [] [] []\n"); + printf("\n"); + printf(" RestartFlag Action\n"); + printf(" 0 Read initial conditions and start simulation\n"); + printf(" 1 Read restart files and resume simulation\n"); + printf(" 2 Restart from specified snapshot dump and resume simulation\n"); + printf(" 3 Run FOF and optionally SUBFIND: [ for SUBBOX_SNAPSHOTS]\n"); + printf( + " 6 Convert snapshot file to different format [input=ICFormat output=SnapFormat NOTE: derived " + "quantities have round-off errors!\n"); + printf(" 14 Write out the Voronoi mesh: \n"); + printf(" 17 Write out snapshot dump with measured gradients\n"); + printf(" 18 Recalculate gravitational potential values for specified snaphot dump: \n"); + printf("\n"); + } + endrun(); + } + + strcpy(ParameterFile, argv[1]); + + if(argc >= 3) + RestartFlag = atoi(argv[2]); + else + RestartFlag = 0; + + if(argc >= 4) + RestartSnapNum = atoi(argv[3]); + else + RestartSnapNum = -1; + + // Do minimal validation of arguments here rather than in random places in the code + if((RestartFlag == 3 || RestartFlag == 6 || RestartFlag == 14 || RestartFlag == 17 || RestartFlag == 18) && RestartSnapNum < 0) + { + mpi_printf("Need to give the snapshot number\n"); + return (0); + } + +#ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT + if(RestartFlag == 18) + { + mpi_printf("Need RECOMPUTE_POTENTIAL_IN_SNAPSHOT for this option\n"); + return (0); + } +#endif /* #ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT */ + +#ifdef RUNNING_SAFETY_FILE + /* do not run if 'running' safety file exists */ + int runningflag = 0; + if(ThisTask == 0) + { + FILE *fd; + char runningfname[MAXLEN_PATH]; + + sprintf(runningfname, "./running"); + if((fd = fopen(runningfname, "r"))) /* Is the running-file present? If yes, interrupt the run. */ + { + fclose(fd); + printf("running-file detected. stopping.\n"); + runningflag = 1; + } + } + MPI_Bcast(&runningflag, 1, MPI_INT, 0, MPI_COMM_WORLD); + if(runningflag) + { + MPI_Finalize(); /* do not call endrun() */ + return 0; + } + else + { + /* touch a running safety file */ + if(ThisTask == 0) + { + FILE *fd; + char runningfname[MAXLEN_PATH]; + + sprintf(runningfname, "./running"); + if((fd = fopen(runningfname, "w"))) + { + fclose(fd); + printf("touching a running-file: %s \n", runningfname); + } + else + terminate("could not touch a running-file: %s\n", runningfname); + } + } +#endif /* #ifdef RUNNING_SAFETY_FILE */ + + begrun1(); /* set-up run */ + + /* see if we are loading a restart file or an IC file */ + if(RestartFlag == 1) + loadrestart(); + else + { + /* We're reading an IC file. Is it a snapshot or really an IC? */ + char fname[MAXLEN_PATH]; + + if(RestartFlag >= 2 && RestartSnapNum >= 0) + { + if(All.NumFilesPerSnapshot > 1) + sprintf(fname, "%s/snapdir_%03d/%s_%03d", All.OutputDir, RestartSnapNum, All.SnapshotFileBase, RestartSnapNum); + else + sprintf(fname, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, RestartSnapNum); + } + else + strcpy(fname, All.InitCondFile); + + /* now we can load the file */ + +#ifdef READ_DM_AS_GAS + read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES); +#else /* #ifdef READ_DM_AS_GAS */ + read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES); +#endif /* #ifdef READ_DM_AS_GAS #else */ + + /* If we are supposed to just convert the file, write and exit here. */ + if(RestartFlag == 6) + { + /* important for proper functioning of FOF+SUBFIND */ + if(All.ComovingIntegrationOn) /* change to new velocity variable */ + { + int i, j; + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + P[i].Vel[j] *= sqrt(All.Time) * All.Time; + } + set_softenings(); + All.TopNodeAllocFactor = 0.08; + All.TreeAllocFactor = 0.7; + All.NgbTreeAllocFactor = 0.7; + + sprintf(All.SnapshotFileBase, "%s_converted", All.SnapshotFileBase); + mpi_printf("Start writing file %s\nRestartSnapNum %d\n", All.SnapshotFileBase, RestartSnapNum); + savepositions(RestartSnapNum, 0); + endrun(); + } + + /* init returns a status code, where a value of >=0 means that endrun() should be called. */ + int status = init(); + + if(status >= 0) + { + if(status > 0) + mpi_printf("init() returned with %d\n", status); + + endrun(); + } + } + + begrun2(); + + run(); /* main simulation loop */ + + endrun(); /* clean up & finalize MPI */ + + return 0; +} + +/*! \brief This function ends the simulations in case of no error. + * + * This method has to be called by all processes. It should be used only + * if the simulation ends without a errors. + * Otherwise terminate() should be used instead. + * + * \return void + */ +void endrun() +{ + mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + fflush(stdout); + +#ifdef HAVE_HDF5 + /*The hdf5 library will sometimes register an atexit() handler that calls its + * error handler. In AREPO this is set to my_hdf_error_handler, which calls + * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed. + * Hence unset the HDF error handler here + */ + H5Eset_auto(NULL, NULL); +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef RUNNING_SAFETY_FILE + if(All.Ti_Current < TIMEBASE) /* simulation has not reached the final time */ + { + char running_fname[MAXLEN_PATH], running_done_fname[MAXLEN_PATH]; + sprintf(running_fname, "./running"); + sprintf(running_done_fname, "./running_done"); + rename(running_fname, running_done_fname); + mpi_printf("moved ./running file to ./running_done, job can now restart.\n"); + } + else + mpi_printf("leaving ./running file in place since run is complete to prevent any restarts.\n"); +#endif /* #ifdef RUNNING_SAFETY_FILE */ + + MPI_Finalize(); + exit(0); +} diff --git a/src/amuse/community/arepo/src/main/main_reduced.c b/src/amuse/community/arepo/src/main/main_reduced.c new file mode 100644 index 0000000000..1e7eec7ba7 --- /dev/null +++ b/src/amuse/community/arepo/src/main/main_reduced.c @@ -0,0 +1,135 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/main.c + * \date 05/2018 + * \brief Start of the program. + * \details contains functions: + * int main(int argc, char **argv) + * void endrun() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief The entry point of the program. + * + * This function initializes the MPI communication packages, and sets + * cpu-time counters to 0. Then begrun1() is called, which sets up + * the simulation. Then either IC's or restart files are loaded. In + * case of IC's init() is called which prepares the IC's for the run. + * A call to begrun2() finishes the initialization. Finally, run() is + * started, the main simulation loop, which iterates over the timesteps. + * + * \param[in] argc Argument count from command line. + * \param[in] argv Argument vector from command line. + * + * \return status of exit; 0 for normal exit. + */ +int main(int argc, char **argv) +{ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); + MPI_Comm_size(MPI_COMM_WORLD, &NTask); + + /* output a welcome message */ + hello(); + + /* initialize CPU-time/Wallclock-time measurement */ + init_cpu_log(); + + determine_compute_nodes(); + + for(PTask = 0; NTask > (1 << PTask); PTask++) + ; + + begrun0(); + + strcpy(ParameterFile, "param.txt"); /* Removing command line parsing. argv[1] replaced with "param.txt". */ + RestartFlag = 0; + + begrun1(); /* set-up run */ + + char fname[MAXLEN_PATH]; + strcpy(fname, All.InitCondFile); + + /* now we can load the file */ + +#ifdef READ_DM_AS_GAS + read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES); +#else /* #ifdef READ_DM_AS_GAS */ + read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES); +#endif /* #ifdef READ_DM_AS_GAS #else */ + + /* init returns a status code, where a value of >=0 means that endrun() should be called. */ + int status = init(); + + if(status >= 0) + { + if(status > 0) + mpi_printf("init() returned with %d\n", status); + + endrun(); + } + + begrun2(); + run(); /* main simulation loop */ + endrun(); /* clean up & finalize MPI */ + + return 0; +} + +/*! \brief This function ends the simulations in case of no error. + * + * This method has to be called by all processes. It should be used only + * if the simulation ends without a errors. + * Otherwise terminate() should be used instead. + * + * \return void + */ +void endrun() +{ + mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + fflush(stdout); + +#ifdef HAVE_HDF5 + /*The hdf5 library will sometimes register an atexit() handler that calls its + * error handler. In AREPO this is set to my_hdf_error_handler, which calls + * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed. + * Hence unset the HDF error handler here + */ + H5Eset_auto(NULL, NULL); +#endif /* #ifdef HAVE_HDF5 */ + + MPI_Finalize(); + exit(0); +} diff --git a/src/amuse/community/arepo/src/main/proto.h b/src/amuse/community/arepo/src/main/proto.h new file mode 100644 index 0000000000..15a346f1bc --- /dev/null +++ b/src/amuse/community/arepo/src/main/proto.h @@ -0,0 +1,665 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/proto.h + * \date 05/2018 + * \brief Function declarations. + * \details No particular order. + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef PROTO_H +#define PROTO_H + +#include "../gravity/forcetree.h" +#include "../main/allvars.h" +#include "../utils/timer.h" + +#include +#include +#include + +#ifdef IMPOSE_PINNING +#ifndef __USE_GNU +#define __USE_GNU +#endif /* #ifndef __USE_GNU */ +#include +#endif /* #ifdef IMPOSE_PINNING */ + +#ifdef HAVE_HDF5 +#include +#endif /* #ifdef HAVE_HDF5 */ + +#if defined(COOLING) +#include "../cooling/cooling_proto.h" +#endif /* #if defined(COOLING) */ + +void sfr_init(); +void sfr_create_star_particles(void); +void ngb_finish_rangebounds_update(int nchanged, int *nodelist); +void ngb_update_rangebounds(int i, int *nchanged, int *nodelist); +int ngb_treefind_variable(MyDouble searchcenter[3], MyFloat hsml, int target, int *startnode, int mode, int *nexport, + int *nsend_local); +int ngb_treebuild(int npart); +void ngb_treeallocate(void); +void ngb_treefree(void); +int ngb_treefind_export_node_threads(int no, int target, int thread_id, int image_flag); +int ngb_treefind_variable_threads(MyDouble searchcenter[3], MyFloat hsml, int target, int mode, int thread_id, int numnodes, + int *firstnode); + +void drift_node(struct NgbNODE *current, integertime time1); +void drift_all_particles(void); +double get_desired_softening_from_mass(double mass); +void log_restart_debug(void); +int get_thread_num(void); +void report_pinning(void); +void detect_topology(void); +void pin_to_core_set(void); +void get_core_set(void); +int derefine_should_this_cell_be_merged(int i, int flag); + +void gravity_external(void); +void gravity(int timebin, int fullflag); +int my_ffsll(peanokey i); +void set_cosmo_factors_for_current_time(void); +void calc_exact_gravity_for_particle_type(void); +void calculate_non_standard_physics_with_valid_gravity_tree(void); +void calculate_non_standard_physics_with_valid_gravity_tree_always(void); +int get_softeningtype_for_hydro_cell(int i); +void gravity_forcetest_testforcelaw(void); +void *myfree_query_last_block(void); + +void subdivide_evenly(int N, int pieces, int index, int *first, int *count); +void force_evaluate_direct(int target, int result_idx, int nimport); +void gravity_direct(int timebin); +double dabs(double a); +double dmax(double a, double b); +double dmin(double a, double b); +double max_array(double *a, int num_elements); +int imax(int a, int b); +int imin(int a, int b); +double mysort(void *base, size_t nel, size_t width, int (*compar)(const void *, const void *)); + +int myflush(FILE *fstream); +int flush_everything(void); +void gravity_force_finalize(int timebin); +void permutate_chunks_in_list(int ncount, int *list); +double get_default_softening_of_particletype(int type); +double get_random_number_aux(void); +void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm); +void ngb_update_velocities(void); +void hello(void); +void find_long_range_step_constraint(void); + +void ngb_treemodifylength(int delta_NgbMaxPart); +void domain_resize_storage(int count_get, int count_get_sph, int option_flag); +void init_individual_softenings(void); +void do_derefinements_and_refinements(); +void mark_active_timebins(void); +void voronoi_test(void); +void execute_resubmit_command(void); +void output_compile_time_options(void); +void init_io_fields(); +void produce_dump(void); + +void create_snapshot_if_desired(void); +void output_log_messages(void); +void mpi_report_committable_memory(void); +long long report_comittable_memory(long long *MemTotal, long long *Committed_AS, long long *SwapTotal, long long *SwapFree); +int check_for_interruption_of_run(void); +void set_non_standard_physics_for_current_time(void); +void calculate_non_standard_physics_prior_mesh_construction(void); +void calculate_non_standard_physics_end_of_step(void); +void compute_statistics(void); +void face_limit_fluxes(struct state *st_L, struct state *st_R, struct state *st_center_L, struct state *st_center_R, + struct fluxes *flux, double dt, double *count, double *count_reduced); + +double get_sound_speed(int p); +void set_pressure_of_cell(int i); +void gradient_init(MyFloat *addr, MyFloat *addr_exch, MySingle *addr_grad, int type); +void limit_vel_gradient(double *d, MySingle *grad_vx, MySingle *grad_vy, MySingle *grad_vz, double csnd); +void subfind_density_hsml_guess(void); +void peano_hilbert_key_inverse(peanokey key, int bits, peano1D *x, peano1D *y, peano1D *z); +void find_nearest_meshpoint_global(mesh_search_data *searchdata, int n, int hsmlguess, int verbose); +void reorder_DP(void); +void peano_hilbert_order_DP(void); +void validate_vertex_velocities(void); + +double get_cell_radius(int i); +double nearest_x(double d); +double nearest_y(double d); +double nearest_z(double d); +int voronoi_get_connected_particles(tessellation *T); +void voronoi_init_connectivity(tessellation *T); +void voronoi_update_connectivity(tessellation *T); +int compare_foreign_connection(const void *a, const void *b); +void voronoi_remove_connection(int i); +int pmforce_is_particle_high_res(int type, MyDouble *pos); + +void cooling_only(void); +void report_VmRSS(void); +void tree_based_timesteps_setsoundspeeds(void); +void voronoi_update_ghost_velvertex(void); +int should_this_cell_be_split(int i); +int do_refinements(void); +int should_this_cell_be_merged(int i, int flag); +int do_derefinements(void); +void move_collisionless_particle(int new_i, int old_i); +void dump_memory_table(void); + +void report_detailed_memory_usage_of_largest_task(void); +void calculate_vertex_velocity_divergence(void); +void make_list_of_active_particles(void); +void find_gravity_timesteps_and_do_gravity_step_first_half(void); +void do_gravity_step_second_half(void); +void voronoi_1D_reorder_gas(void); +int voronoi_1D_compare_key(const void *a, const void *b); +void voronoi_1D_order(void); +void pm2d_init_periodic(void); +void pm2d_init_periodic_allocate(void); + +void pm2d_init_periodic_free(void); +void pm2d_force_periodic(int mode); +int pm2d_periodic_compare_sortindex(const void *a, const void *b); +void pm2d_mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)); +int timestep_evaluate(int target, int mode, int threadid); +void tree_based_timesteps(void); +int MPI_Check_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbufreal, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status); +int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, + MPI_Datatype recvtype, MPI_Comm comm); +double parallel_sort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)); + +double parallel_sort_comm(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), MPI_Comm comm); +int compare_IDs(const void *a, const void *b); +void test_id_uniqueness(void); +void drift_particle(int i, integertime time1); +void put_symbol(char *string, double t0, double t1, char c); +void write_cpu_log(void); +void *mymalloc_fullinfo(const char *varname, size_t n, const char *func, const char *file, int linenr, int clear_flag, char *origin); +void *mymalloc_movable_fullinfo(void *ptr, const char *varname, size_t n, const char *func, const char *file, int line, char *origin); +void *myrealloc_fullinfo(void *p, size_t n, const char *func, const char *file, int line); +void *myrealloc_movable_fullinfo(void *p, size_t n, const char *func, const char *file, int line); + +void myfree_fullinfo(void *p, const char *func, const char *file, int line); +void myfree_movable_fullinfo(void *p, const char *func, const char *file, int line); +void mymalloc_init(void); +void calculate_maxid(void); +void determine_compute_nodes(void); +double INLINE_FUNC hubble_function(double a); +void fof_fof(int num); +double fof_find_groups(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask); +void fof_compile_catalogue(void); +void fof_save_groups(int num); + +double fof_periodic(double x); +double fof_periodic_wrap(double x); +double fof_find_nearest_dmparticle(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask); +void fof_compute_group_properties(int gr, int start, int len); +int fof_compare_FOF_PList_MinID(const void *a, const void *b); +int fof_compare_FOF_GList_MinID(const void *a, const void *b); +int fof_compare_FOF_GList_MinIDTask(const void *a, const void *b); +int fof_compare_FOF_GList_MinIDTask_MinID(const void *a, const void *b); +int fof_compare_FOF_GList_LocCountTaskDiffMinID(const void *a, const void *b); +int fof_compare_FOF_GList_ExtCountMinID(const void *a, const void *b); + +int fof_compare_Group_GrNr(const void *a, const void *b); +int fof_compare_Group_MinIDTask(const void *a, const void *b); +int fof_compare_Group_MinID(const void *a, const void *b); +int fof_compare_ID_list_GrNrID(const void *a, const void *b); +int fof_compare_Group_MinIDTask_MinID(const void *a, const void *b); +int fof_compare_Group_Len(const void *a, const void *b); +int fof_compare_aux_sort_Type(const void *a, const void *b); +int fof_compare_aux_sort_GrNr(const void *a, const void *b); +int fof_compare_aux_sort_OriginTask_OriginIndex(const void *a, const void *b); +int fof_compare_aux_sort_FileOrder(const void *a, const void *b); + +int fof_compare_local_sort_data_targetindex(const void *a, const void *b); +void fof_subfind_exchange(MPI_Comm Communicator); +void fof_prepare_output_order(void); +void fof_compute_group_properties(int gr, int start, int len); +void fof_exchange_group_data(void); +void fof_finish_group_properties(void); +double fof_get_comoving_linking_length(void); +void fof_assign_group_numbers(void); +void fof_reorder_PS(int *Id, int Nstart, int N); +void fof_subfind_write_file(char *fname, int writeTask, int lastTask); + +void fof_subfind_prepare_ID_list(void); +int subfind_compare_procassign_GrNr(const void *a, const void *b); +double subfind_so_potegy(double *egypot); +void subfind_distlinklist_get_two_heads(long long ngb_index1, long long ngb_index2, long long *head, long long *head_attach); +void fof_check_for_full_nodes_recursive(int no); +int fof_return_a_particle_in_cell_recursive(int no); +void subfind_loctree_copyExtent(void); +int subfind_distlinklist_get_tail_set_tail_increaselen(long long index, long long *tail, long long newtail); +void subfind_reorder_according_to_submp(void); +int subfind_compare_submp_OldIndex(const void *a, const void *b); + +int subfind_compare_submp_GrNr_DM_Density(const void *a, const void *b); +double subfind_exchange(void); +void subfind_coll_domain_decomposition(void); +void subfind_coll_domain_combine_topleaves_to_domains(int ncpu, int ndomain); +void subfind_coll_domain_free(void); +void subfind_coll_domain_allocate(void); +int subfind_coll_domain_determineTopTree(void); +void subfind(int num); +double subfind_density(int mode); +double subfind_overdensity(void); + +void subfind_save_final(int num); +void subfind_process_group_collectively(int nsubgroups_cat); +void subfind_coll_findExtent(void); +void subfind_reorder_PS(int *Id, int Nstart, int N); +void subfind_reorder_P(int *Id, int Nstart, int N); +void subfind_distribute_particles(MPI_Comm Communicator); +void subfind_coll_domain_walktoptree(int no); +int subfind_compare_densities(const void *a, const void *b); +int subfind_compare_binding_energy(const void *a, const void *b); +int subfind_compare_dist_rotcurve(const void *a, const void *b); + +int subfind_compare_coll_candidates_rank(const void *a, const void *b); +int subfind_compare_coll_candidates_boundlength(const void *a, const void *b); +int subfind_compare_coll_candidates_nsubs(const void *a, const void *b); +int subfind_compare_coll_candidates_subnr(const void *a, const void *b); +void subfind_col_find_coll_candidates(int totgrouplen); +void subfind_unbind_independent_ones(int count); +void subfind_distribute_groups(void); +void subfind_potential_compute(int num, struct unbind_data *d, int phase, double weakly_bound_limit); +int subfind_col_unbind(struct unbind_data *d, int num, int *num_non_gas); +void subfind_find_linkngb(void); + +int subfind_loctree_treebuild(int npart, struct unbind_data **mp); +void subfind_loctree_update_node_recursive(int no, int sib, int father); +double subfind_loctree_treeevaluate_potential(int target); +void subfind_loctree_copyExtent(void); +double subfind_locngb_treefind(MyDouble xyz[3], int desngb, double hguess); +void subfind_loctree_findExtent(int npart, struct unbind_data *mp); +int subfind_locngb_treefind_variable(MyDouble searchcenter[3], double hguess); +size_t subfind_loctree_treeallocate(int maxnodes, int maxpart); +void subfind_loctree_treefree(void); +void subfind_find_nearesttwo(void); + +int subfind_process_group_serial(int gr, int offset, int nsubgroups_cat); +int subfind_unbind(struct unbind_data *ud, int len, int *len_non_gas); +int subfind_locngb_compare_key(const void *a, const void *b); +int subfind_compare_serial_candidates_subnr(const void *a, const void *b); +int subfind_compare_serial_candidates_rank(const void *a, const void *b); +int subfind_compare_dens(const void *a, const void *b); +int subfind_compare_serial_candidates_boundlength(const void *a, const void *b); +int subfind_compare_dist_rotcurve(const void *a, const void *b); +int subfind_compare_binding_energy(const void *a, const void *b); +int subfind_compare_densities(const void *a, const void *b); + +int subfind_compare_ID_list(const void *a, const void *b); +int subfind_compare_SubGroup_GrNr_SubNr(const void *a, const void *b); +void subfind_poll_for_requests(void); +long long subfind_distlinklist_setrank_and_get_next(long long index, long long *rank); +long long subfind_distlinklist_get_rank(long long index); +void subfind_distlinklist_set_next(long long index, long long next); +void subfind_distlinklist_add_particle(long long index); +void subfind_distlinklist_add_bound_particles(long long index, int nsub); +void subfind_distlinklist_mark_particle(long long index, int target, int submark); +long long subfind_distlinklist_get_next(long long index); + +long long subfind_distlinklist_get_head(long long index); +void subfind_distlinklist_set_headandnext(long long index, long long head, long long next); +void subfind_distlinklist_set_tailandlen(long long index, long long tail, int len); +void subfind_distlinklist_get_tailandlen(long long index, long long *tail, int *len); +void subfind_distlinklist_set_all(long long index, long long head, long long tail, int len, long long next); +long long subfind_distlinklist_set_head_get_next(long long index, long long head); +int subfind_compare_dist_rotcurve(const void *a, const void *b); +void subfind_coll_treeallocate(int maxpart, int maxindex); +void subfind_coll_treefree(void); +void subfind_coll_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z); + +void subfind_coll_exchange_topleafdata(void); +void subfind_coll_update_node_recursive(int no, int sib, int father, int *last); +void subfind_coll_insert_pseudo_particles(void); +int subfind_coll_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z, unsigned long long xc, unsigned long long yc, + unsigned long long zc, unsigned long long ilen); +int subfind_coll_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char levels); +int subfind_coll_treebuild_construct(int npart, struct unbind_data *mp); +int subfind_coll_treebuild(int npart, struct unbind_data *mp); +double subfind_get_particle_balance(void); +int subfind_fof_compare_ID(const void *a, const void *b); +void write_file(char *fname, int readTask, int lastTask, int subbox_flag); + +void distribute_file(int nfiles, int firstfile, int firsttask, int lasttask, int *filenr, int *master, int *last); +int get_values_per_blockelement(enum iofields blocknr); +int get_datatype_in_block(enum iofields blocknr, int mode); +void get_dataset_name(enum iofields blocknr, char *buf); +int blockpresent(enum iofields blocknr, int write); +void fill_write_buffer(void *buffer, enum iofields blocknr, int *pindex, int pc, int type, int subbox_flag); +void empty_read_buffer(enum iofields blocknr, int offset, int pc, int type); +int get_particles_in_block(enum iofields blocknr, int *typelist); +int get_bytes_per_blockelement(enum iofields blocknr, int mode); +void read_file(const char *fname, int filenr, int readTask, int lastTask, int); + +void get_Tab_IO_Label(enum iofields blocknr, char *label); +void long_range_init_regionsize(void); +int find_files(const char *fname); +double get_random_number(void); +int peano_compare_key(const void *a, const void *b); +void mysort_domain(void *b, size_t n, size_t s); +void mysort_peano(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)); +int density_isactive(int n); +size_t sizemax(size_t a, size_t b); +void my_gsl_error_handler(const char *reason, const char *file, int line, int gsl_errno); + +void reconstruct_timebins(void); +peanokey peano_hilbert_key(peano1D x, peano1D y, peano1D z, int bits); +void enable_core_dumps_and_fpu_exceptions(void); +void find_next_sync_point(void); +void set_units_sfr(void); +void gravity_forcetest(void); +void allocate_memory(void); +void begrun0(void); +void begrun1(void); +void begrun2(void); + +int init(void); +void loadrestart(void); +void reread_params_after_loading_restart(void); +void check_omega(void); +void close_logfiles(void); +void compute_grav_accelerations(int timebin, int fullflag); +void compute_global_quantities_of_system(void); +void cooling_and_starformation(void); +void density(void); +void do_box_wrapping(void); + +void domain_Decomposition(void); +double enclosed_mass(double R); +void endrun(void); +void energy_statistics(void); +void ewald_corr(double dx, double dy, double dz, double *fper); +void ewald_force(double x, double y, double z, double force[3]); +int my_fls(int x); +void ewald_init(void); +double ewald_psi(double x, double y, double z); +double ewald_pot_corr(double dx, double dy, double dz); + +integertime find_next_outputtime(integertime time); +void minimum_large_ints(int n, long long *src, long long *res); +double get_starformation_rate(int i); +double calc_egyeff(int i, double gasdens, double *ne, double *x, double *tsfr, double *factorEVP); +void gravity_tree(int timebin); +void init_clouds(void); +void integrate_sfr(void); +size_t my_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); +size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *stream); +void open_logfiles(void); + +void peano_hilbert_order(void); +void predict(double time); +void read_ic(const char *fname, int); +void read_header_attributes(FILE *fd); +MyIDType determine_ids_offset(void); +int read_outputlist(char *fname); +void read_parameter_file(char *fname); +void check_parameters(); +void reorder_gas(int *Id); +void reorder_particles(int *Id); + +void restart(int mod); +void run(void); +void savepositions(int num, int subbox_flag); +void mpi_printf(const char *fmt, ...); +void mpi_fprintf(FILE *stream, const char *fmt, ...); +void mpi_printf_each(const char *fmt, ...); +FILE *open_file(char *); +double second(void); +void set_softenings(void); +void set_units(void); + +void setup_smoothinglengths(void); +void sumup_large_ints(int n, int *src, long long *res); +void sumup_longs(int n, long long *src, long long *res); +void statistics(void); +double timediff(double t0, double t1); +void veldisp(void); +double get_hydrokick_factor(integertime time0, integertime time1); +double get_gravkick_factor(integertime time0, integertime time1); +double drift_integ(double a, void *param); +double gravkick_integ(double a, void *param); + +double hydrokick_integ(double a, void *param); +void init_drift_table(void); +double get_drift_factor(integertime time0, integertime time1); +double measure_time(void); +void long_range_init(void); +void long_range_force(void); +void pm_init_periodic(void); +void pmforce_periodic(int mode, int *typelist); +void pm_init_regionsize(void); +void pm_init_nonperiodic(void); + +int pmforce_nonperiodic(int grnr); +void readjust_timebase(double TimeMax_old, double TimeMax_new); +void pm_setup_nonperiodic_kernel(void); +void init_gradients(); +void init_scalars(); +void print_particle_info(int i); +void print_state_info(struct state *st); +void print_state_face_info(struct state_face *st); +void face_set_scalar_states_and_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux); +void face_turn_momentum_flux(struct fluxes *flux, struct geometry *geom); + +void face_clear_fluxes(struct fluxes *flux); +int face_check_responsibility_of_this_task(tessellation *T, int p1, int p2, struct state *st_L, struct state *st_R); +int face_get_normals(tessellation *T, int i, struct geometry *geom); +int face_get_state(tessellation *T, int p, int i, struct state *st); +void face_boundary_check(point *p, double *velx, double *vely, double *velz); +void face_boundary_check_vertex(tessellation *T, int p, MyFloat *velx, MyFloat *vely, MyFloat *velz); +double face_timestep(struct state *state_L, struct state *state_R, double *hubble_a, double *atime); +void state_convert_to_local_frame(struct state *st, double *vel_face, double hubble_a, double atime); +void face_do_time_extrapolation(struct state *delta, struct state *st, double atime); +void face_do_spatial_extrapolation(struct state *delta, struct state *st, struct state *st_other); + +void face_do_spatial_extrapolation_single_quantity(double *delta, double st, double st_other, MySingle *grad, double *dx, double *r); +void face_add_extrapolations(struct state *st_face, struct state *delta_time, struct state *delta_space, struct fvs_stat *stat); +void face_add_extrapolation(struct state *st_face, struct state *delta, struct fvs_stat *stat); +void face_turn_velocities(struct state *st, struct geometry *geom); +void solve_advection(struct state *st_L, struct state *st_R, struct state_face *st_face, struct geometry *geom, double *vel_face); +void face_turnback_velocities(struct state_face *st_face, struct geometry *geom); +void face_get_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux, struct geometry *geom, + double *vel_face); +void face_add_fluxes_advection(struct state_face *st_face, struct fluxes *flux, struct geometry *geom, double *vel_face); +double godunov_flux_3d(struct state *st_L, struct state *st_R, struct state_face *st_face); +void sample_solution_vacuum_left_3d(double S, struct state *st_R, struct state_face *st_face); + +void sample_solution_vacuum_right_3d(double S, struct state *st_L, struct state_face *st_face); +void sample_solution_vacuum_generate_3d(double S, struct state *st_L, struct state *st_R, struct state_face *st_face); +void get_mach_numbers(struct state *st_L, struct state *st_R, double Press); +void sample_solution_3d(double S, struct state *st_L, struct state *st_R, double Press, double Vel, struct state_face *st_face); +int riemann(struct state *st_L, struct state *st_R, double *Press, double *Vel); +void pressure_function(double P, struct state *st, double *F, double *FD); +double guess_for_pressure(struct state *st_L, struct state *st_R); +void riemann_isotherm(struct state *st_L, struct state *st_R, double *Rho, double *Vel, double csnd); +void isothermal_function(double rhostar, double rho, double *F, double *FD); +void sample_solution_isothermal3d(double S, struct state *st_L, struct state *st_R, double Rho, double Vel, struct state_face *st_face, + double csnd); + +void apply_flux_list(void); +int flux_list_data_compare(const void *a, const void *b); +void set_vertex_velocities(void); +int scalar_init(MyFloat *addr, MyFloat *addr_mass, int type); +void compute_interface_fluxes(tessellation *T); +void update_primitive_variables(void); +void set_pressure_of_cell_internal(struct particle_data *P, struct sph_particle_data *SphP, int i); +void do_validity_checks(struct particle_data *P, struct sph_particle_data *SphP, int i, struct pv_update_data *pvd); +void update_primitive_variables_single(struct particle_data *P, struct sph_particle_data *SphP, int i, struct pv_update_data *pvd); + +void update_internal_energy(struct particle_data *P, struct sph_particle_data *SphP, int i, struct pv_update_data *pvd); +void mpi_exchange_buffers(void *send_buf, int *send_count, int *send_offset, void *recv_buf, int *recv_count, int *recv_offset, + int item_size, int commtag, int include_self); +int mpi_calculate_offsets(int *send_count, int *send_offset, int *recv_count, int *recv_offset, int send_identical); +void *sort_based_on_mesh_search(mesh_search_data *search, void *data, int n_items, int item_size); +void *sort_based_on_field(void *data, int field_offset, int n_items, int item_size); +void mpi_distribute_items_from_search(mesh_search_data *search, void *data, int *n_items, int *max_n, int item_size, int commtag, + int task_offset, int cell_offset); +void mpi_distribute_items_to_tasks(void *data, int task_offset, int *n_items, int *max_n, int item_size, int commtag); +void tile_ics(void); +void reallocate_memory_maxpart(void); +void reallocate_memory_maxpartsph(void); + +void share_particle_number_in_file(const char *fname, int filenr, int readTask, int lastTask, int readTypes); +int dump_memory_table_buffer(char *p); +void calc_memory_checksum(void *base, size_t bytes); +void allreduce_sparse_double_sum(double *loc, double *glob, int N); +void allreduce_sparse_imin(int *loc, int *glob, int N); +void myMPI_Alltoallv(void *sendb, size_t *sendcounts, size_t *sdispls, void *recvb, size_t *recvcounts, size_t *rdispls, int len, + int big_flag, MPI_Comm comm); +int myMPI_Sendrecv(void *sendb, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvb, size_t recvcount, + MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status); +size_t roundup_to_multiple_of_cacheline_size(size_t n); +void init_cpu_log(void); + +void write_error(int check, size_t nwritten, size_t nmemb); +size_t smax(size_t a, size_t b); +void init_field(enum iofields field, const char *label, const char *datasetname, enum types_in_memory type_in_memory, + enum types_in_file type_in_file_output, enum types_in_file type_in_file_input, int values_per_block, enum arrays array, + void *pointer_to_field, void (*io_func)(int, int, void *, int), int typelist_bitmask); +void init_units(enum iofields field, double a, double h, double L, double M, double V, double c); +void init_snapshot_type(enum iofields field, enum sn_type type); + +void swap_Nbyte(char *data, int n, int m); +void swap_header(void); + +#if defined(COOLING) +void cool_cell(int i); +#endif /* #if defined(COOLING) */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +void special_particle_create_list(); +void special_particle_update_list(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#ifdef HAVE_HDF5 + +hid_t my_H5Fcreate(const char *fname, unsigned flags, hid_t fcpl_id, hid_t fapl_id); +hid_t my_H5Gcreate(hid_t loc_id, const char *groupname, size_t size_hint); +hid_t my_H5Dcreate(hid_t loc_id, const char *datasetname, hid_t type_id, hid_t space_id, hid_t dcpl_id); +hid_t my_H5Acreate(hid_t loc_id, const char *attr_name, hid_t type_id, hid_t space_id, hid_t acpl_id); +hid_t my_H5Screate(H5S_class_t type); +hid_t my_H5Screate_simple(int rank, const hsize_t *current_dims, const hsize_t *maximum_dims); +herr_t my_H5Dwrite(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, const void *buf, + const char *datasetname); +herr_t my_H5Awrite(hid_t attr_id, hid_t mem_type_id, const void *buf, const char *attr_name); +hid_t my_H5Fopen(const char *fname, unsigned int flags, hid_t fapl_id); +hid_t my_H5Dopen(hid_t file_id, const char *datasetname); + +hid_t my_H5Dopen_if_existing(hid_t file_id, const char *datasetname); +herr_t my_H5Dread(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, void *buf, + const char *datasetname); +hid_t my_H5Gopen(hid_t loc_id, const char *groupname); +hid_t my_H5Aopen_name(hid_t loc_id, const char *attr_name); +herr_t my_H5Aread(hid_t attr_id, hid_t mem_type_id, void *buf, const char *attr_name, hssize_t size); +herr_t my_H5Aclose(hid_t attr_id, const char *attr_name); +herr_t my_H5Dclose(hid_t dataset_id, const char *datasetname); +herr_t my_H5Gclose(hid_t group_id, const char *groupname); +herr_t my_H5Fclose(hid_t file_id, const char *fname); +herr_t my_H5Sclose(hid_t dataspace_id, H5S_class_t type); + +hid_t my_H5Tcopy(hid_t type_id); +herr_t my_H5Tclose(hid_t type_id); +herr_t my_H5Sselect_hyperslab(hid_t space_id, H5S_seloper_t op, const hsize_t *start, const hsize_t *stride, const hsize_t *count, + const hsize_t *block); +size_t my_H5Tget_size(hid_t datatype_id); +herr_t my_H5Tset_size(hid_t datatype_id, size_t size); +herr_t my_H5Sset_extent_simple(hid_t space_id, int rank, const hsize_t *current_size, const hsize_t *maximum_size, + const char *attr_name); +hid_t my_H5Dget_space(hid_t dataset_id, const char *datasetname); + +#ifdef HDF5_FILTERS +htri_t my_H5Pall_filters_avail(hid_t plist_id); +hid_t my_H5Pcreate(hid_t class_id); +herr_t my_H5Pclose(hid_t plist); +herr_t my_H5Pset_chunk(hid_t plist, int ndims, const hsize_t *dim); +herr_t my_H5Pset_shuffle(hid_t plist_id); +herr_t my_H5Pset_deflate(hid_t plist_id, uint level); +herr_t my_H5Pset_fletcher32(hid_t plist_id); +#endif /* #ifdef HDF5_FILTERS */ + +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef HOST_MEMORY_REPORTING +void check_maxmemsize_setting(void); +#endif /* #ifdef HOST_MEMORY_REPORTING */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING +int get_softening_type_from_mass(double mass); +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + +#ifdef MHD +void do_mhd_source_terms_first_half(void); +void do_mhd_source_terms_second_half(void); +#endif /* #ifdef MHD */ + +#ifdef ONEDIMS_SPHERICAL +void gravity_monopole_1d_spherical(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#if defined(PMGRID) +void my_slab_based_fft(fft_plan *plan, void *data, void *workspace, int forward); +void my_slab_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward); +void my_slab_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ); +void my_slab_transposeA(fft_plan *plan, fft_real *field, fft_real *scratch); +void my_slab_transposeB(fft_plan *plan, fft_real *field, fft_real *scratch); +void my_column_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ); +void my_column_based_fft_init_c2c(fft_plan *plan, int NgridX, int NgridY, int NgridZ); +void my_column_based_fft(fft_plan *plan, void *data, void *workspace, int forward); +void my_column_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward); +void my_fft_swap23(fft_plan *plan, fft_real *data, fft_real *out); + +void my_fft_swap13(fft_plan *plan, fft_real *data, fft_real *out); +void my_fft_swap23back(fft_plan *plan, fft_real *data, fft_real *out); +void my_fft_swap13back(fft_plan *plan, fft_real *data, fft_real *out); +#endif /* #if defined(PMGRID) */ + +#ifdef RIEMANN_HLLC +double godunov_flux_3d_hllc(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux); +#endif /* #ifdef RIEMANN_HLLC */ + +#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) +void flux_convert_to_lab_frame(struct state *st_L, struct state *st_R, double *vel_face, struct fluxes *flux); +#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */ + +#ifdef RIEMANN_HLLD +double godunov_flux_3d_hlld(struct state *st_L, struct state *st_R, double *vel_face, struct state_face *st_face, struct fluxes *flux); +#endif /* #ifdef RIEMANN_HLLD */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES +void subfind_fof_calc_am_collective(int snapnr, int ngroups_cat); +int subfind_fof_calc_am_serial(int gr, int Offs, int snapnr, int ngroups_cat); +void subfind_add_grp_props_calc_fof_angular_momentum(int num, int ngroups_cat); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef USE_SFR +void convert_cell_into_star(int i, double birthtime); +void spawn_star_from_cell(int igas, double birthtime, int istar, MyDouble mass_of_star); +void make_star(int idx, int i, double prob, MyDouble mass_of_star, double *sum_mass_stars); +#endif /* #ifdef USE_SFR */ + +#endif /* #ifndef PROTO_H */ diff --git a/src/amuse/community/arepo/src/main/run.c b/src/amuse/community/arepo/src/main/run.c new file mode 100644 index 0000000000..0bdca04354 --- /dev/null +++ b/src/amuse/community/arepo/src/main/run.c @@ -0,0 +1,660 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/run.c + * \date 05/2018 + * \brief The main simulation loop. + * \details contains functions: + * void run(void) + * void do_second_order_source_terms_first_half(void) + * void do_second_order_source_terms_second_half(void) + * void set_non_standard_physics_for_current_time(void) + * void calculate_non_standard_physics_with_valid_gravity_tree(void) + * void calculate_non_standard_physics_with_valid_gravity_tree_always(void) + * void calculate_non_standard_physics_prior_mesh_construction(void) + * void calculate_non_standard_physics_end_of_step(void) + * int check_for_interruption_of_run(void) + * int check_for_interruption_of_run(void) + * integertime find_next_outputtime(integertime ti_curr) + * void execute_resubmit_command(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +static void do_second_order_source_terms_first_half(void); +static void do_second_order_source_terms_second_half(void); +static void create_end_file(void); + +/*! \brief Contains the main simulation loop that iterates over + * single timesteps. + * + * The loop terminates when the cpu-time limit is + * reached, when a `stop' file is found in the output directory, or + * when the simulation ends because we arrived at TimeMax. + * + * If the simulation is started from initial conditions, a domain + * decomposition performed, the gravitational forces are computed and the + * Voronoi mesh is constructed. + * + * The main loop is structured as follow: + * - find new timesteps: find_timesteps() + * - first gravitational half kick: do_gravity_step_first_half() + * - gradients are calculated: calculate_gradients() + * - vertex velocities are assigned: set_vertex_velocities() + * - computation of the hydro flux: compute_interface_fluxes() (first half) + * - (de)refinement of hydro cells: do_derefinements_and_refinements() + * - drifting particles to next sync point: find_next_sync_point() + * (Afterwards the timebins are updated, so different particles might + * now be active then before) + * - (if needed) a new domain decomposition: domain_Decomposition() + * - construction of the Voronoi mesh: create_mesh() + * - computation of the hydro flux: compute_interface_fluxes() (second half) + * - update of primitive variables: update_primitive_variables() + * - computation of gravitational forces: in do_gravity_step_second_half() + * - second gravitational half kick: do_gravity_step_second_half() + * + * \return void + */ +void run(void) +{ + CPU_Step[CPU_MISC] += measure_time(); + + if(RestartFlag != 1) /* if we have restarted from restart files, no need to do the setup sequence */ + { + mark_active_timebins(); + + output_log_messages(); + + set_non_standard_physics_for_current_time(); + + ngb_treefree(); + domain_free(); + domain_Decomposition(); /* do domain decomposition if needed */ + + ngb_treeallocate(); + ngb_treebuild(NumGas); + + calculate_non_standard_physics_prior_mesh_construction(); + + create_mesh(); + + mesh_setup_exchange(); + + update_primitive_variables(); + + calculate_non_standard_physics_end_of_step(); + + exchange_primitive_variables(); + + calculate_gradients(); + + set_vertex_velocities(); /* determine the speed of the mesh-generating vertices */ + + ngb_update_velocities(); /* update the neighbor tree with the new vertex and cell velocities */ + + do_second_order_source_terms_second_half(); + + do_gravity_step_second_half(); + } + +#if defined(VORONOI_STATIC_MESH) + if(RestartFlag == 1) + { + int n_hydro_backup = TimeBinsHydro.NActiveParticles; + int *time_bin_hydro = (int *)malloc(NumGas * sizeof(int)); + int *hydro_particles = (int *)malloc(n_hydro_backup * sizeof(int)); + for(int j = 0; j < TimeBinsHydro.NActiveParticles; j++) + hydro_particles[j] = TimeBinsHydro.ActiveParticleList[j]; + + for(int j = 0; j < NumGas; j++) + { + time_bin_hydro[j] = P[j].TimeBinHydro; + P[j].TimeBinHydro = All.HighestActiveTimeBin; + TimeBinsHydro.ActiveParticleList[j] = j; + } + TimeBinsHydro.NActiveParticles = NumGas; + + create_mesh(); + mesh_setup_exchange(); + + for(int j = 0; j < NumGas; j++) + P[j].TimeBinHydro = time_bin_hydro[j]; + + TimeBinsHydro.NActiveParticles = n_hydro_backup; + for(int j = 0; j < TimeBinsHydro.NActiveParticles; j++) + TimeBinsHydro.ActiveParticleList[j] = hydro_particles[j]; + + free(time_bin_hydro); + free(hydro_particles); + } +#endif /* #if defined(VORONOI_STATIC_MESH) */ + + while(1) /* main loop */ + { + if(RestartFlag != + 1) /* if we are starting from restart files, skip in the first iteration the parts until the restart files were written */ + { + compute_statistics(); + + flush_everything(); + + create_snapshot_if_desired(); + + if(All.Ti_Current >= TIMEBASE) /* we reached the final time */ + { + mpi_printf("\nFinal time=%g reached. Simulation ends.\n", All.TimeMax); + + if(All.Ti_lastoutput != All.Ti_Current) /* make a snapshot at the final time in case none has produced at this time */ + produce_dump(); /* this will be overwritten if All.TimeMax is increased and the run is continued */ + + create_end_file(); // create empty file called end in output directory + + break; + } + + find_timesteps_without_gravity(); /* find-timesteps */ + + find_gravity_timesteps_and_do_gravity_step_first_half(); /* gravity half-step for hydrodynamics */ + /* kicks collisionless particles by half a step */ + +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + update_timesteps_from_gravity(); +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + */ + + do_second_order_source_terms_first_half(); + + exchange_primitive_variables(); + + /* let's reconstruct gradients for every cell using Green-Gauss gradient estimation */ + calculate_gradients(); + + /* determine the speed of the mesh-generating vertices */ + set_vertex_velocities(); + + /* update the neighbor tree with the new vertex and cell velocities */ + ngb_update_velocities(); + + exchange_primitive_variables_and_gradients(); + + /* compute intercell flux with Riemann solver and update the cells with the fluxes */ + compute_interface_fluxes(&Mesh); + +#ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT +#ifndef VORONOI_STATIC_MESH + free_mesh_structures_not_needed_for_derefinement_refinement(); +#endif /* #ifndef VORONOI_STATIC_MESH */ +#endif /* #ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */ + +#ifdef REFINEMENT + do_derefinements_and_refinements(); +#endif /* #ifdef REFINEMENT */ + + write_cpu_log(); /* output some CPU usage log-info (accounts for everything needed up to completion of the current + sync-point) */ + + find_next_sync_point(); /* find next synchronization time */ + + make_list_of_active_particles(); + + output_log_messages(); /* write some info to log-files */ + +#if !defined(VORONOI_STATIC_MESH) +#ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT + free_all_remaining_mesh_structures(); +#else /* #ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */ + free_mesh(); +#endif /* #ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT #else */ +#endif /* #if !defined(VORONOI_STATIC_MESH) */ + /* Check whether we should write a restart file. + * Note that at this place we do not need to store the mesh, not the gravity tree. + */ + if(check_for_interruption_of_run()) + return; + } + else + RestartFlag = 0; + + set_non_standard_physics_for_current_time(); + +#if defined(VORONOI_STATIC_MESH) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) /* may only be used if there is no gravity \ + */ +#else /* #if defined(VORONOI_STATIC_MESH) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) */ + + if(All.HighestActiveTimeBin >= All.SmallestTimeBinWithDomainDecomposition) /* only do this for sufficiently large steps */ + { +#ifdef VORONOI_STATIC_MESH + free_mesh(); +#endif /* #ifdef VORONOI_STATIC_MESH */ + + ngb_treefree(); + domain_free(); + + drift_all_particles(); + + domain_Decomposition(); /* do new domain decomposition, will also make a new chained-list of synchronized particles */ + + ngb_treeallocate(); + ngb_treebuild(NumGas); + +#if defined(VORONOI_STATIC_MESH) + create_mesh(); + mesh_setup_exchange(); +#endif /* #if defined(VORONOI_STATIC_MESH) */ + } +#endif /* #if defined(VORONOI_STATIC_MESH) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) #else */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + special_particle_update_list(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + + calculate_non_standard_physics_prior_mesh_construction(); + +#if !defined(VORONOI_STATIC_MESH) + create_mesh(); + mesh_setup_exchange(); +#endif /* #if !defined(VORONOI_STATIC_MESH) */ + + exchange_primitive_variables_and_gradients(); + + compute_interface_fluxes(&Mesh); + + update_primitive_variables(); /* these effectively closes off the hydro step */ + + /* the masses and positions are updated, let's get new forces and potentials */ + + do_second_order_source_terms_second_half(); + + do_gravity_step_second_half(); /* this closes off the gravity half-step */ + + /* do any extra physics, Strang-split (update both primitive and conserved variables as needed ) */ + calculate_non_standard_physics_end_of_step(); + } + + restart(0); /* write a restart file at final time - can be used to continue simulation beyond final time */ + + write_cpu_log(); /* output final cpu measurements */ +} + +/*! \brief Source terms before hydrodynamics timestep. + * + * \return void + */ +void do_second_order_source_terms_first_half(void) +{ +#ifdef MHD + do_mhd_source_terms_first_half(); +#endif /* #ifdef MHD */ +} + +/* \brief Source terms after hydrodynamics timestep. + * + * If there are multiple source terms, the order of the second half source + * terms should be applied inverse to the order of the source terms in + * do_second_order_source_terms_first_half(). + * + * \return void + */ +void do_second_order_source_terms_second_half(void) +{ +#ifdef MHD + do_mhd_source_terms_second_half(); +#endif /* #ifdef MHD */ +} + +/*! \brief Calls extra modules after drift operator. + * + * This routine is called after the active particles are drifted + * to the next syncpoint, but before a new domain decomposition + * is performed. + * + * \return void + */ +void set_non_standard_physics_for_current_time(void) +{ +#if defined(COOLING) + IonizeParams(); /* set UV background for the current time */ +#endif /* #if defined(COOLING) */ +} + +/*! \brief calls extra modules after the gravitational force is recomputed. + * + * Only called if full gravity tree is present. + * *** NOTICE *** if HIERARCHICAL_GRAVITY is adopted, this function is carried + * out once per synchronization time, with in general only a partial tree that + * does not necessarily contain all particles. The latter is the case only for + * steps where the highest timesteps are active ("full timesteps"). + * + * \return void + */ +void calculate_non_standard_physics_with_valid_gravity_tree(void) {} + +/*! \brief Calls extra modules after the gravitational force is recomputed + * + * This is for runs which have the full tree at each time step; + * no HIERARCHICAL_GRAVITY + * + * \return void + */ +void calculate_non_standard_physics_with_valid_gravity_tree_always(void) {} + +/*! \brief Calls extra modules before the Voronoi mesh is built. + * + * \return void + */ +void calculate_non_standard_physics_prior_mesh_construction(void) +{ +#if defined(COOLING) && defined(USE_SFR) + sfr_create_star_particles(); +#endif /* #if defined(COOLING) && defined(USE_SFR) */ +} + +/*! \brief Calls extra modules at the end of the run loop. + * + * The second gravitational half kick is already applied to the + * particles and the voronoi mesh is updated. + * + * \return void + */ +void calculate_non_standard_physics_end_of_step(void) +{ +#ifdef COOLING +#ifdef USE_SFR + cooling_and_starformation(); +#else /* #ifdef USE_SFR */ + cooling_only(); +#endif /* #ifdef USE_SFR #else */ +#endif /* #ifdef COOLING */ +} + +/*! \brief Checks whether the run must interrupted. + * + * The run is interrupted either if the stop file is present or, + * if 85% of the CPU time are up. This routine also handles the + * regular writing of restart files. The restart file is also + * written if the restart file is present. + * + * \return 1 if the run has to be interrupted, 0 otherwise. + */ +int check_for_interruption_of_run(void) +{ + /* Check whether we need to interrupt the run */ + int stopflag = 0; + if(ThisTask == 0) + { + FILE *fd; + char stopfname[MAXLEN_PATH]; + + sprintf(stopfname, "%sstop", All.OutputDir); + if((fd = fopen(stopfname, "r"))) /* Is the stop-file present? If yes, interrupt the run. */ + { + fclose(fd); + printf("stop-file detected. stopping.\n"); + stopflag = 1; + unlink(stopfname); + } + + sprintf(stopfname, "%srestart", All.OutputDir); + if((fd = fopen(stopfname, "r"))) /* Is the restart-file present? If yes, write a user-requested restart file. */ + { + fclose(fd); + printf("restart-file detected. writing restart files.\n"); + stopflag = 3; + unlink(stopfname); + } + + if(CPUThisRun > 0.85 * All.TimeLimitCPU) /* are we running out of CPU-time ? If yes, interrupt run. */ + { + printf("reaching time-limit. stopping.\n"); + stopflag = 2; + } + } + + MPI_Bcast(&stopflag, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(stopflag) + { + restart(0); /* write restart file */ + + MPI_Barrier(MPI_COMM_WORLD); + + if(stopflag == 3) + return 0; + + if(stopflag == 2 && ThisTask == 0) + { + FILE *fd; + char contfname[MAXLEN_PATH]; + sprintf(contfname, "%scont", All.OutputDir); + if((fd = fopen(contfname, "w"))) + fclose(fd); + + if(All.ResubmitOn) + execute_resubmit_command(); + } + return 1; + } + + /* is it time to write a regular restart-file? (for security) */ + if(ThisTask == 0) + { + if((CPUThisRun - All.TimeLastRestartFile) >= All.CpuTimeBetRestartFile) + { + All.TimeLastRestartFile = CPUThisRun; + stopflag = 3; + } + else + stopflag = 0; + } + + MPI_Bcast(&stopflag, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(stopflag == 3) + { + restart(0); /* write an occasional restart file */ + stopflag = 0; + } + return 0; +} + +/*! \brief Returns the next output time that is equal or larger than + * ti_curr. + * + * \param[in] ti_curr Current simulation time. + * + * \return Next output time. + */ +integertime find_next_outputtime(integertime ti_curr) +{ + int i, iter = 0; + integertime ti, ti_next; + double next, time; + + DumpFlagNextSnap = 1; + ti_next = -1; + + if(All.OutputListOn) + { + for(i = 0; i < All.OutputListLength; i++) + { + time = All.OutputListTimes[i]; + + if(time >= All.TimeBegin && time <= All.TimeMax) + { + if(All.ComovingIntegrationOn) + ti = (integertime)(log(time / All.TimeBegin) / All.Timebase_interval); + else + ti = (integertime)((time - All.TimeBegin) / All.Timebase_interval); + +#ifdef PROCESS_TIMES_OF_OUTPUTLIST + /* first, determine maximum output interval based on All.MaxSizeTimestep */ + integertime timax = (integertime)(All.MaxSizeTimestep / All.Timebase_interval); + + /* make it a power 2 subdivision */ + integertime ti_min = TIMEBASE; + while(ti_min > timax) + ti_min >>= 1; + timax = ti_min; + + double multiplier = ti / ((double)timax); + + /* now round this to the nearest multiple of timax */ + ti = ((integertime)(multiplier + 0.5)) * timax; +#endif /* #ifdef PROCESS_TIMES_OF_OUTPUTLIST */ + if(ti >= ti_curr) + { + if(ti_next == -1) + { + ti_next = ti; + DumpFlagNextSnap = All.OutputListFlag[i]; + } + + if(ti_next > ti) + { + ti_next = ti; + DumpFlagNextSnap = All.OutputListFlag[i]; + } + } + } + } + } + else + { + if(All.ComovingIntegrationOn) + { + if(All.TimeBetSnapshot <= 1.0) + terminate("TimeBetSnapshot > 1.0 required for your simulation.\n"); + } + else + { + if(All.TimeBetSnapshot <= 0.0) + terminate("TimeBetSnapshot > 0.0 required for your simulation.\n"); + } + + time = All.TimeOfFirstSnapshot; + iter = 0; + + while(time < All.TimeBegin) + { + if(All.ComovingIntegrationOn) + time *= All.TimeBetSnapshot; + else + time += All.TimeBetSnapshot; + + iter++; + + if(iter > 1000000) + terminate("Can't determine next output time.\n"); + } + + while(time <= All.TimeMax) + { + if(All.ComovingIntegrationOn) + ti = (integertime)(log(time / All.TimeBegin) / All.Timebase_interval); + else + ti = (integertime)((time - All.TimeBegin) / All.Timebase_interval); + + if(ti >= ti_curr) + { + ti_next = ti; + break; + } + + if(All.ComovingIntegrationOn) + time *= All.TimeBetSnapshot; + else + time += All.TimeBetSnapshot; + + iter++; + + if(iter > 1000000) + terminate("Can't determine next output time.\n"); + } + } + + if(ti_next == -1) + { + ti_next = 2 * TIMEBASE; /* this will prevent any further output */ + + mpi_printf("\nRUN: There is no valid time for a further snapshot file.\n"); + } + else + { + if(All.ComovingIntegrationOn) + next = All.TimeBegin * exp(ti_next * All.Timebase_interval); + else + next = All.TimeBegin + ti_next * All.Timebase_interval; + +#ifdef TIMESTEP_OUTPUT_LIMIT + mpi_printf("\nRUN: Limiting timestep to %g to fulfill output frequency", 0.1 * (next - All.Time)); + All.TimestepOutputLimit = 0.1 * (next - All.Time); +#endif /* #ifdef TIMESTEP_OUTPUT_LIMIT */ + + mpi_printf("\nRUN: Setting next time for snapshot file to Time_next= %g (DumpFlag=%d)\n\n", next, DumpFlagNextSnap); + } + + return ti_next; +} + +/*! \brief Creates an empty file called 'end' in the output directory. + * + * The existence of this file can be used e.g. for analysis scripts to + * verify that the simulation has run up to its final time and ended without + * error. Note that the end-file is completely passive. + * + * \return void + */ +static void create_end_file(void) +{ + FILE *fd; + char contfname[MAXLEN_PATH]; + sprintf(contfname, "%send", All.OutputDir); + if((fd = fopen(contfname, "w"))) + fclose(fd); +} + +/*! \brief Executes the resubmit command. + * + * \return void + */ +void execute_resubmit_command(void) +{ + char buf[1000]; + sprintf(buf, "%s", All.ResubmitCommand); +#ifndef NOCALLSOFSYSTEM + system(buf); +#endif /* #ifndef NOCALLSOFSYSTEM */ +} diff --git a/src/amuse/community/arepo/src/mesh/criterion_derefinement.c b/src/amuse/community/arepo/src/mesh/criterion_derefinement.c new file mode 100644 index 0000000000..7108310fc2 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/criterion_derefinement.c @@ -0,0 +1,181 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/criterion_derefinement.c + * \date 05/2018 + * \brief Criteria for the de-refinement of a cell. + * \details Routines which are checking whether a cell should be + * de-refined. + * contains functions: + * int derefine_should_this_cell_be_merged(int i, int flag) + * static int derefine_criterion_default(int i) + * static int derefine_criterion_jeans_ref(int i) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) +static int derefine_criterion_jeans_ref(int i); +static int derefine_criterion_default(int i); +static int jeans_derefinement_criteria(int i); + +/*! \brief Should this cell be dissolved? + * + * This function signals whether a cell should be dissolved. This needs to be + * adjusted according to the needs of the simulation in question. One may also + * set the SphP[].Flag variable beforehand, these cells will also be + * dissolved. + * + * \param[in] i Index of cell in P and SphP arrays. + * \param[in] flag If this is nonzero, flag is returned. + * + * \return Flag if this cell should be dissolved. + */ +int derefine_should_this_cell_be_merged(int i, int flag) +{ +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].AllowRefinement == 0) + return 0; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + +#ifdef NODEREFINE_BACKGROUND_GRID + /* Keep in mind that this is used in cosmological zoom simulations. + * I.e. this enforces no derefinement for cells in low-res region, while not + * affecting the high-res region. + */ + if(SphP[i].Volume > 0.1 * All.MeanVolume) + return 0; +#endif /* #ifdef NODEREFINE_BACKGROUND_GRID */ + +#if defined(REFINEMENT_VOLUME_LIMIT) + double maxvolume = All.MaxVolume; + double minvolume = All.MinVolume; + + if(SphP[i].Volume > 0.5 * maxvolume) + return 0; + + if(SphP[i].Volume < 0.5 * minvolume) + return 1; + + if(All.MaxVolumeDiff > 0 && SphP[i].Volume > 0.3 * All.MaxVolumeDiff * SphP[i].MinNgbVolume) + return 0; +#endif /* #if defined(REFINEMENT_VOLUME_LIMIT) */ + + if(flag) + return flag; + + switch(All.DerefinementCriterion) + { + case 0: + return 0; + break; + + case 1: + return derefine_criterion_default(i); + break; + + case 2: + return derefine_criterion_jeans_ref(i); + break; + + default: + terminate("invalid derefinement criterion specified"); + break; + } + + return 0; +} + +/* + * static functions; i.e. functions that are only called within this file + */ + +/*! \brief Default de-refinement criterion. + * + * Checks if cell is within a factor of 2 of the target gas mass. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be dissolved. + */ +static int derefine_criterion_default(int i) +{ +#if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) + + if(P[i].Mass < 0.5 * All.TargetGasMass) + return 1; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) */ + + return 0; +} + +/*! \brief Wrapper for Jeans de-refinement criterion. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be dissolved. + */ +static int derefine_criterion_jeans_ref(int i) +{ +#ifdef JEANS_REFINEMENT + return jeans_derefinement_criteria(i); +#endif /* #ifdef JEANS_REFINEMENT */ + return 0; +} + +/*! \brief De-refinement criterion according to Jeans stability of a cell. + * + * The cell can only be de-refined if the Jeans length is resolved by + * 1.5 * JEANS_REFINEMENT cells. Otherwise, no de-refinement is possible even + * if the cell has a low mass. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be dissolved. + */ +static int jeans_derefinement_criteria(int i) +{ + if(P[i].Mass < 0.5 * All.TargetGasMass) + return 1; + +#ifdef JEANS_REFINEMENT + double jeans_number, jeans_length, sound_speed, dx; + sound_speed = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density); + jeans_length = sqrt(M_PI / All.G / SphP[i].Density) * sound_speed; + dx = 2.0 * get_cell_radius(i); + jeans_number = jeans_length / dx; + + if(jeans_number > 1.5 * JEANS_REFINEMENT && P[i].Mass < 0.5 * All.TargetGasMass) + return 1; +#endif /* #ifdef JEANS_REFINEMENT */ + return 0; +} + +#endif /* #if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/criterion_refinement.c b/src/amuse/community/arepo/src/mesh/criterion_refinement.c new file mode 100644 index 0000000000..5b0334972a --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/criterion_refinement.c @@ -0,0 +1,267 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/criterion_refinement.c + * \date 05/2018 + * \brief Criteria for the refinement of a cell. + * \details Routines which are checking whether a cell should be refined. + * contains functions: + * int should_this_cell_be_split(int i) + * static int can_this_cell_be_split(int i) + * static int refine_criterion_default(int i) + * static int refine_criterion_jeans_ref(int i) + * static int jeans_refinement_criteria(int i) + * static int refine_criterion_volume(int i) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#if defined(REFINEMENT_SPLIT_CELLS) && !defined(ONEDIMS) +static int can_this_cell_be_split(int i); +static int refine_criterion_default(int i); +static int refine_criterion_jeans_ref(int i); +static int jeans_refinement_criteria(int i); + +#ifdef REFINEMENT_VOLUME_LIMIT +static int refine_criterion_volume(int i); +#endif + +#ifdef REFINEMENT_MERGE_CELLS +char *FlagDoNotRefine; +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + +/*! \brief Should this cell be refined? + * + * This function signals whether a cell needs further refinement. This needs + * to be adjusted according to the needs of the simulation in question. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be split. + */ +int should_this_cell_be_split(int i) +{ +#ifdef REFINEMENT_MERGE_CELLS + if(FlagDoNotRefine[i]) + return 0; +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + + if(P[i].Mass == 0 && P[i].ID == 0) /* skip cells that have been swallowed or dissolved */ + return 0; + +#if defined(REFINEMENT_VOLUME_LIMIT) + double maxvolume = All.MaxVolume; + double minvolume = All.MinVolume; + + if(SphP[i].Volume > 2. * maxvolume) + if(can_this_cell_be_split(i)) + return 1; + + if(SphP[i].Volume < 2. * minvolume) + return 0; + + if(refine_criterion_volume(i)) + if(can_this_cell_be_split(i)) + return 1; +#endif /* #if defined(REFINEMENT_VOLUME_LIMIT) */ + + switch(All.RefinementCriterion) /* select the function that evaluates the refinement criterion */ + { + case 0: + return 0; + break; + + case 1: + return refine_criterion_default(i); + break; + + case 2: + return refine_criterion_jeans_ref(i); + break; + + default: + terminate("invalid refinement criterion specified"); + break; + } + + return 0; +} + +/* + * static functions; i.e. functions that are only called within this file + */ + +/*! \brief Is cell round enough to be refined? + * + * This function signals whether a cell is allowed refinement. A cell that + * is supposed to be refined needs to match certain roundness criteria, which + * are specified in this function. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell is allowed to be refined. + */ +static int can_this_cell_be_split(int i) +{ +#ifdef REGULARIZE_MESH_FACE_ANGLE + if(SphP[i].MaxFaceAngle < 1.5 * All.CellMaxAngleFactor) + return 1; + +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + double dx = nearest_x(P[i].Pos[0] - SphP[i].Center[0]); + double dy = nearest_y(P[i].Pos[1] - SphP[i].Center[1]); + double dz = nearest_z(P[i].Pos[2] - SphP[i].Center[2]); + double d = sqrt(dx * dx + dy * dy + dz * dz); + double cellrad = get_cell_radius(i); + + if(d < 2.0 * All.CellShapingFactor * cellrad) /* only refine cells which are reasonably 'round' */ + return 1; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + + return 0; +} + +/*! \brief Default refinement criterion. + * + * Checks if cell is within a factor of 2 of the target gas mass. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be refined. + */ +static int refine_criterion_default(int i) +{ +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].AllowRefinement != 0) +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + if(can_this_cell_be_split(i) && P[i].Mass > 2.0 * All.TargetGasMass) + return 1; + + return 0; /* default is not to refine */ +} + +/*! \brief Jeans refinement criterion additional target mass criterion + * + * Resolving the Jeans length is an additional criterion, apart from obeying + * the usual factor of 2 within a target mass criterion. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be refined. + */ +static int refine_criterion_jeans_ref(int i) +{ +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].AllowRefinement != 0) +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + if(can_this_cell_be_split(i)) + { + if(P[i].Mass > 2.0 * All.TargetGasMass) + return 1; + +#ifdef JEANS_REFINEMENT + return jeans_refinement_criteria(i); +#else /* #ifdef JEANS_REFINEMENT */ + return 0; +#endif /* #ifdef JEANS_REFINEMENT #else */ + } + + return 0; +} + +/*! \brief Refinement criterion according to Jeans stability of a cell. + * + * The cell will be refined if the Jeans length is not resolved by + * JEANS_REFINEMENT cells. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be refined. + */ +static int jeans_refinement_criteria(int i) +{ +#ifdef JEANS_REFINEMENT + if(can_this_cell_be_split(i)) + { + double jeans_number, jeans_length, sound_speed, dx; + + sound_speed = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density); + jeans_length = sqrt(M_PI / All.G / SphP[i].Density) * sound_speed; + dx = 2.0 * get_cell_radius(i); + jeans_number = jeans_length / dx; + + if(jeans_number < JEANS_REFINEMENT) + { + return 1; + } + } +#endif /* #ifdef JEANS_REFINEMENT */ + + return 0; +} + +#ifdef REFINEMENT_VOLUME_LIMIT +/*! \brief Refinement criterion for based on the minimum volume of a + * neighboring cell. + * + * This criterion is supposed to avoid sudden jumps in resolution which lead + * to an inaccurate result. Each cell that has a volume larger than a + * specified factor times the minimum volume of all neighboring cells will be + * refined. This also includes a global absolute minimum and maximum volume. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be refined. + */ +static int refine_criterion_volume(int i) +{ + if(All.MaxVolumeDiff > 0 && SphP[i].Volume > All.MaxVolumeDiff * SphP[i].MinNgbVolume) + { +#ifdef REGULARIZE_MESH_FACE_ANGLE + if(SphP[i].MaxFaceAngle < 1.5 * All.CellMaxAngleFactor) + return 1; +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + + double dx = nearest_x(P[i].Pos[0] - SphP[i].Center[0]); + double dy = nearest_y(P[i].Pos[1] - SphP[i].Center[1]); + double dz = nearest_z(P[i].Pos[2] - SphP[i].Center[2]); + double d = sqrt(dx * dx + dy * dy + dz * dz); + double cellrad = get_cell_radius(i); + + if(d < 2.0 * All.CellShapingFactor * cellrad) /* only refine cells which are reasonably 'round' */ + return 1; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + } + + return 0; +} +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/mesh.h b/src/amuse/community/arepo/src/mesh/mesh.h new file mode 100644 index 0000000000..654555ebf6 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/mesh.h @@ -0,0 +1,268 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/mesh.h + * \date 05/2018 + * \brief Header for mesh structures. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef MESH_H +#define MESH_H + +#define SCALAR_TYPE_PASSIVE 0 /*!< only advection */ +#define SCALAR_TYPE_SPECIES 1 /*!< species are normalised to guarantee sum{species}=1 */ +#define SCALAR_TYPE_NORMALIZE 2 /*!< the same normalisation factor as for species is applied, but no contribution to sum{species} */ + +#define REFL_X_FLAGS 115043766 +#define REFL_Y_FLAGS 132379128 +#define REFL_Z_FLAGS 134217216 + +#define OUTFLOW_X (1 << 27) +#define OUTFLOW_Y (1 << 28) +#define OUTFLOW_Z (1 << 29) + +#if defined MAXSCALARS +extern struct scalar_elements +{ + int type; /*!< scalar type, determines whether a normalization is applied */ + size_t offset; /*!< offset of the primitive quantity in the SphP struct */ + size_t offset_mass; /*!< offset of the conserved quantity in the SphP struct */ +} scalar_elements[MAXSCALARS]; + +extern struct scalar_index +{ +#ifdef REFINEMENT_HIGH_RES_GAS + int HighResMass; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ +} ScalarIndex; + +extern int N_Scalar; /*!< number of registered scalars */ +#endif /* #if defined MAXSCALARS */ + +#define GRADIENT_TYPE_NORMAL 0 +#define GRADIENT_TYPE_VELX 1 +#define GRADIENT_TYPE_VELY 2 +#define GRADIENT_TYPE_VELZ 3 +#define GRADIENT_TYPE_DENSITY 4 +#define GRADIENT_TYPE_PRESSURE 5 +#define GRADIENT_TYPE_UTHERM 6 +#define GRADIENT_TYPE_AX 7 +#define GRADIENT_TYPE_AY 8 +#define GRADIENT_TYPE_AZ 9 +#define GRADIENT_TYPE_FLD 10 +#define GRADIENT_TYPE_RTF 11 + +extern struct grad_elements +{ + int type; /*!< gradient type, ensures special treatment for velocities and speed of sound */ + size_t offset; /*!< offset of the quantity in the SphP struct */ + size_t offset_exch; /*!< offset of the quantity in the PrimExch struct */ + size_t offset_grad; /*!< offset in the grad_data struct */ + double *min_value, *max_value; + double value0, value1; +} grad_elements[MAXGRADIENTS], *GDensity, *GVelx, *GVely, *GVelz, *GPressure, *GUtherm; + +extern int N_Grad; /*!< number of gradients to be calculated */ + +extern struct grad_data +{ + MySingle drho[3]; + + MySingle dvel[3][3]; + MySingle dpress[3]; + +#ifdef MHD + MySingle dB[3][3]; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + MySingle dscalars[MAXSCALARS][3]; +#endif /* #ifdef MAXSCALARS */ +} * GradExch; + +extern struct primexch +{ + double Volume; + MyFloat Density; + + MyFloat VelGas[3]; + MyFloat VelVertex[3]; + +#ifdef MHD + MyFloat B[3]; + +#ifdef MHD_POWELL + MyFloat DivB; +#endif /* #ifdef MHD_POWELL */ + + MyFloat CurlB[3]; +#endif /* #ifdef MHD */ + MyFloat Pressure; + +#ifdef MAXSCALARS + MyFloat Scalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ + + double TimeLastPrimUpdate; + + MyDouble Center[3]; + MyFloat OldMass; + MySingle Csnd; + MySingle SurfaceArea; + MySingle ActiveArea; + /* int task, index; */ + short int TimeBinHydro; +} * PrimExch; + +#ifdef REFINEMENT +extern struct refdata +{ +#ifdef REFINEMENT_VOLUME_LIMIT + double Volume; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + short int TimeBinHydro; +} * RefExch; +#endif /* #ifdef REFINEMENT */ + +typedef struct face_data +{ + int p1, p2; +#ifdef REFINEMENT_MERGE_CELLS + int t, nr; /* delaunay tetra and edge number that generated this face */ +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + +#ifdef OPTIMIZE_MEMORY_USAGE + MyFloat area; + MyFloat cx, cy, cz; /* center-of-mass of face */ +#else /* #ifdef OPTIMIZE_MEMORY_USAGE */ + double area; + double cx, cy, cz; /* center-of-mass of face */ +#endif /* #ifdef OPTIMIZE_MEMORY_USAGE #else */ + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS + double area_backup; +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ +#ifdef TETRA_INDEX_IN_FACE + int dt_index; +#endif /* #ifdef TETRA_INDEX_IN_FACE */ +} face; + +/*! left or right state of a face */ +struct state +{ + double dx, dy, dz; + double dt_half; + short int timeBin; + + double rho; + double velx, vely, velz; + double press; + double oldmass; + double surfacearea; + double activearea; + double volume; + + MyFloat velGas[3]; + MyFloat velVertex[3]; + struct grad_data *grad; + + double csnd; + double Energy; +#ifdef MHD + double Bx, By, Bz; +#ifdef MHD_POWELL + double divB; +#endif /* #ifdef MHD_POWELL */ + double CurlB[3]; +#endif /* #ifdef MHD */ + +#if defined(GODUNOV_STATS) + double mach; +#endif /* #if defined(GODUNOV_STATS) */ + +#ifdef MAXSCALARS + double scalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ + MyIDType ID; + +#ifdef ONEDIMS_SPHERICAL + double radius; +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + double dtExtrapolation; +}; + +/*! state on a face determined by riemann solver */ +extern struct state_face +{ + double rho; + double velx, vely, velz; + double press; +#ifdef MHD + double Bx, By, Bz; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + double *scalars; +#endif /* #ifdef MAXSCALARS */ +} state_face; + +/*! flux through a face */ +extern struct fluxes +{ + double mass; + double momentum[3]; + double energy; + +#ifdef MHD + double B[3]; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + double scalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ +} fluxes, diffusionfluxes; + +extern struct geometry +{ + double nn; + double nx, ny, nz; + double mx, my, mz; + double px, py, pz; + double cx, cy, cz; +} geom; + +struct pv_update_data +{ + double atime; + double hubble_a; + double a3inv; +}; +#endif /* MESH_H */ + +struct fvs_stat +{ + int count_disable_extrapolation; +}; diff --git a/src/amuse/community/arepo/src/mesh/refinement.c b/src/amuse/community/arepo/src/mesh/refinement.c new file mode 100644 index 0000000000..20b2c4d5a2 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/refinement.c @@ -0,0 +1,217 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/refinement.c + * \date 05/2018 + * \brief Driver routines that handle refinement and de-refinement. + * \details contains functions: + * void do_derefinements_and_refinements() + * void refinement_prepare() + * void refinement_cleanup() + * void move_collisionless_particle(int new_i, int old_i) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" + +#ifdef REFINEMENT +#include "../main/proto.h" + +#if defined(REFINEMENT_MERGE_CELLS) && defined(REFINEMENT_SPLIT_CELLS) +char *FlagDoNotRefine; +#endif /* #if defined (REFINEMENT_MERGE_CELLS) && defined (REFINEMENT_SPLIT_CELLS) */ + +static void refinement_prepare(); +static void refinement_cleanup(); + +/*! \brief Main routine to trigger refinement and de-refinements. + * + * Called in main run loop (run.c). + * + * \return void + */ +void do_derefinements_and_refinements() +{ + refinement_prepare(); + +#ifdef REFINEMENT_MERGE_CELLS + do_derefinements(); +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + +#ifdef REFINEMENT_SPLIT_CELLS + do_refinements(); +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + + refinement_cleanup(); +} + +/*! \brief Prepares for refinement. + * + * Determines quantities needed by refinement routine; + * Allocates additional arrays. + * + * \return void + */ +void refinement_prepare() +{ + TIMER_START(CPU_REFINE); + +#ifdef REFINEMENT_VOLUME_LIMIT + int idx, i; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + +#if defined(REFINEMENT_MERGE_CELLS) && defined(REFINEMENT_SPLIT_CELLS) + FlagDoNotRefine = mymalloc_movable(&FlagDoNotRefine, "FlagDoNotRefine", NumGas * sizeof(char)); +#endif /* #if defined (REFINEMENT_MERGE_CELLS) && defined (REFINEMENT_SPLIT_CELLS) */ + +#ifdef REFINEMENT_VOLUME_LIMIT + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].MinNgbVolume = MAX_REAL_NUMBER; + + int q = SphP[i].first_connection; + while(q >= 0) + { + int dp = DC[q].dp_index; + int particle = Mesh.DP[dp].index; + + if(particle < 0) + { + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + continue; + } + + if(particle >= NumGas && Mesh.DP[dp].task == ThisTask) + particle -= NumGas; + + double Volume; + if(DC[q].task == ThisTask) + Volume = SphP[particle].Volume; + else + { +#ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT + Volume = PrimExch[particle].Volume; +#else /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */ + Volume = RefExch[particle].Volume; +#endif /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT #else */ + } + + if(Volume < SphP[i].MinNgbVolume) + SphP[i].MinNgbVolume = Volume; + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + } +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + + TIMER_STOP(CPU_REFINE); +} + +/*! \brief Cleans up after refinement. + * + * Frees memory allocated by refinement_prepare(). + * + * \return void + */ +void refinement_cleanup() +{ +#if defined(REFINEMENT_MERGE_CELLS) && defined(REFINEMENT_SPLIT_CELLS) + myfree(FlagDoNotRefine); +#endif /* #if defined (REFINEMENT_MERGE_CELLS) && defined (REFINEMENT_SPLIT_CELLS) */ +} + +/*! \brief Moves collisionless particle from index old_i to new_i. + * + * Needed if new cell is introduced, as cells have to be at the beginning of + * the P array and all other particles have to be located after the last + * gas cell. This routine moves not only data in P and SphP, but also updates + * the time-bin data consistently. + * + * \param[in] new_i New index of particle in P. + * \param[in] old_i Previous index of particle in P. + * + * \return void + */ +void move_collisionless_particle(int new_i, int old_i) +{ + int prev, next, bin; + struct TimeBinData *tbData; + + P[new_i] = P[old_i]; + + if(P[old_i].Mass == 0 && P[old_i].ID == 0) + return; + + if(P[old_i].Mass == 0 && P[old_i].Type == 4) + return; + + tbData = &TimeBinsGravity; + bin = P[old_i].TimeBinGrav; + + if(TimeBinSynchronized[bin]) + { + /* particle is active, need to add it to the list of active particles again + we assume here, that the new particle at the old index in this list is also active! */ + tbData->ActiveParticleList[tbData->NActiveParticles] = new_i; + tbData->NActiveParticles++; + } + + /* now move it in the link list of its timebin + we only need to change the gravity timebin here */ + + tbData->NextInTimeBin[new_i] = tbData->NextInTimeBin[old_i]; + tbData->PrevInTimeBin[new_i] = tbData->PrevInTimeBin[old_i]; + + prev = tbData->PrevInTimeBin[old_i]; + next = tbData->NextInTimeBin[old_i]; + + if(prev >= 0) + tbData->NextInTimeBin[prev] = new_i; + else + { + if(tbData->FirstInTimeBin[bin] != old_i) + terminate("strange"); + tbData->FirstInTimeBin[bin] = new_i; + } + + if(next >= 0) + tbData->PrevInTimeBin[next] = new_i; + else + { + if(tbData->LastInTimeBin[bin] != old_i) + terminate("strange"); + tbData->LastInTimeBin[bin] = new_i; + } +} + +#endif /* REFINEMENT */ diff --git a/src/amuse/community/arepo/src/mesh/set_vertex_velocities.c b/src/amuse/community/arepo/src/mesh/set_vertex_velocities.c new file mode 100644 index 0000000000..9280b5fde6 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/set_vertex_velocities.c @@ -0,0 +1,321 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/set_vertex_velocities.c + * \date 05/2018 + * \brief Algorithms that decide how individual cells are moving. + * \details contains functions: + * void set_vertex_velocities(void) + * static void validate_vertex_velocities_1d() + * void validate_vertex_velocities(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 08.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#ifdef ONEDIMS_SPHERICAL +static void validate_vertex_velocities_1d(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +/*! \brief Sets velocities of individual mesh-generating points. + * + * \retur void + */ +void set_vertex_velocities(void) +{ + TIMER_START(CPU_SET_VERTEXVELS); + + int idx, i, j; + double dt; + +#if defined(VORONOI_STATIC_MESH) || defined(NOHYDRO) + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(j = 0; j < 3; j++) + SphP[i].VelVertex[j] = 0; + } + TIMER_STOP(CPU_SET_VERTEXVELS); + return; +#endif /* #if defined (VORONOI_STATIC_MESH) || defined (NOHYDRO) */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifdef MESHRELAX + for(j = 0; j < 3; j++) + SphP[i].VelVertex[j] = 0; +#else /* #ifdef MESHRELAX */ + for(j = 0; j < 3; j++) + SphP[i].VelVertex[j] = P[i].Vel[j]; /* make cell velocity equal to fluid's velocity */ +#endif /* #ifdef MESHRELAX #else */ + + double acc[3]; + + /* the actual time-step of particle */ + integertime ti_step = P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0; + dt = ti_step * All.Timebase_interval; + dt /= All.cf_hubble_a; /* this gives the actual timestep: dt = dloga/ (adot/a) */ + + /* now let's add the gradient of the pressure force + * note that the gravity half-step was already included in P[i].Vel[j] + * prior to calling this function, thus it does not need to be accounted + * here explicitly. + */ + if(SphP[i].Density > 0) + { + acc[0] = -SphP[i].Grad.dpress[0] / SphP[i].Density; + acc[1] = -SphP[i].Grad.dpress[1] / SphP[i].Density; + acc[2] = -SphP[i].Grad.dpress[2] / SphP[i].Density; + +#ifdef MHD + /* we also add the acceleration due to the Lorentz force */ + acc[0] += (SphP[i].CurlB[1] * SphP[i].B[2] - SphP[i].CurlB[2] * SphP[i].B[1]) / SphP[i].Density; + acc[1] += (SphP[i].CurlB[2] * SphP[i].B[0] - SphP[i].CurlB[0] * SphP[i].B[2]) / SphP[i].Density; + acc[2] += (SphP[i].CurlB[0] * SphP[i].B[1] - SphP[i].CurlB[1] * SphP[i].B[0]) / SphP[i].Density; + +#endif /* #ifdef MHD */ + + SphP[i].VelVertex[0] += 0.5 * dt * acc[0]; + SphP[i].VelVertex[1] += 0.5 * dt * acc[1]; + SphP[i].VelVertex[2] += 0.5 * dt * acc[2]; + } + } + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifdef REGULARIZE_MESH_CM_DRIFT + + double dx, dy, dz, d, fraction; + + dx = nearest_x(P[i].Pos[0] - SphP[i].Center[0]); + dy = nearest_y(P[i].Pos[1] - SphP[i].Center[1]); + dz = nearest_z(P[i].Pos[2] - SphP[i].Center[2]); + + /* the actual time-step of particle */ + dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + dt /= All.cf_hubble_a; /* this is dt, the actual timestep */ + + double cellrad = get_cell_radius(i); + +#if !defined(REGULARIZE_MESH_FACE_ANGLE) + /* if there is a density gradient, use a center that is displaced slightly in the direction of the gradient. + * This makes sure that the Lloyd scheme does not simply iterate towards cells of equal volume, instead + * we keep cells of roughly equal mass. + */ + double dgrad = sqrt(SphP[i].Grad.drho[0] * SphP[i].Grad.drho[0] + SphP[i].Grad.drho[1] * SphP[i].Grad.drho[1] + + SphP[i].Grad.drho[2] * SphP[i].Grad.drho[2]); + + if(dgrad > 0) + { + double scale = SphP[i].Density / dgrad; + double tmp = 3 * cellrad + scale; + double x = (tmp - sqrt(tmp * tmp - 8 * cellrad * cellrad)) / 4; + + if(x < 0.25 * cellrad) + { + dx = nearest_x(P[i].Pos[0] - (SphP[i].Center[0] + x * SphP[i].Grad.drho[0] / dgrad)); + dy = nearest_y(P[i].Pos[1] - (SphP[i].Center[1] + x * SphP[i].Grad.drho[1] / dgrad)); + dz = nearest_z(P[i].Pos[2] - (SphP[i].Center[2] + x * SphP[i].Grad.drho[2] / dgrad)); + } + } +#endif /* #if !defined(REGULARIZE_MESH_FACE_ANGLE) */ + + d = sqrt(dx * dx + dy * dy + dz * dz); + + fraction = 0; + +#if !defined(REGULARIZE_MESH_FACE_ANGLE) + if(d > 0.75 * All.CellShapingFactor * cellrad && dt > 0) + { + if(d > All.CellShapingFactor * cellrad) + fraction = All.CellShapingSpeed; + else + fraction = All.CellShapingSpeed * (d - 0.75 * All.CellShapingFactor * cellrad) / (0.25 * All.CellShapingFactor * cellrad); + } +#else /* #if !defined(REGULARIZE_MESH_FACE_ANGLE) */ + if(SphP[i].MaxFaceAngle > 0.75 * All.CellMaxAngleFactor && dt > 0) + { + if(SphP[i].MaxFaceAngle > All.CellMaxAngleFactor) + fraction = All.CellShapingSpeed; + else + fraction = All.CellShapingSpeed * (SphP[i].MaxFaceAngle - 0.75 * All.CellMaxAngleFactor) / (0.25 * All.CellMaxAngleFactor); + } +#endif /* #if !defined(REGULARIZE_MESH_FACE_ANGLE) #else */ + + if(d > 0 && fraction > 0) + { + double v; +#ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED + + v = All.cf_atime * get_sound_speed(i); + +#if defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE) + /* calculate gravitational velocity scale */ + double ax, ay, az, ac, vgrav; +#ifdef HIERARCHICAL_GRAVITY + ax = SphP[i].FullGravAccel[0]; + ay = SphP[i].FullGravAccel[1]; + az = SphP[i].FullGravAccel[2]; +#else /* #ifdef HIERARCHICAL_GRAVITY */ + ax = P[i].GravAccel[0]; + ay = P[i].GravAccel[1]; + az = P[i].GravAccel[2]; +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ +#ifdef PMGRID + ax += P[i].GravPM[0]; + ay += P[i].GravPM[1]; + az += P[i].GravPM[2]; +#endif /* #ifdef PMGRID */ + ac = sqrt(ax * ax + ay * ay + az * az); + vgrav = 4 * sqrt(All.cf_atime * cellrad * ac); + if(v < vgrav) + v = vgrav; +#endif /* #if defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE) */ + + double vcurl = cellrad * SphP[i].CurlVel; + if(v < vcurl) + v = vcurl; + +#else /* #ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED */ + v = All.cf_atime * All.cf_atime * d / dt; /* use fiducial velocity */ + + double vel = sqrt(P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]); + double vmax = dmax(All.cf_atime * get_sound_speed(i), vel); + if(v > vmax) + v = vmax; +#endif /* #ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED #else */ + +#ifdef REFINEMENT_SPLIT_CELLS + double proj = SphP[i].SepVector[0] * dx + SphP[i].SepVector[1] * dy + SphP[i].SepVector[2] * dz; + + if(proj != 0) + { + dx = proj * SphP[i].SepVector[0]; + dy = proj * SphP[i].SepVector[1]; + dz = proj * SphP[i].SepVector[2]; + } + + SphP[i].SepVector[0] = 0; + SphP[i].SepVector[1] = 0; + SphP[i].SepVector[2] = 0; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + + SphP[i].VelVertex[0] += fraction * v * (-dx / d); + SphP[i].VelVertex[1] += fraction * v * (-dy / d); + SphP[i].VelVertex[2] += fraction * v * (-dz / d); + } +#endif /* #ifdef REGULARIZE_MESH_CM_DRIFT */ + + for(j = NUMDIMS; j < 3; j++) + SphP[i].VelVertex[j] = 0; /* vertex velocities for unused dimensions set to zero */ + } + +#ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE + voronoi_exchange_primitive_variables(); + calculate_vertex_velocity_divergence(); +#endif /* #ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE */ + +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + validate_vertex_velocities(); +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ + +#ifdef ONEDIMS_SPHERICAL + validate_vertex_velocities_1d(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + TIMER_STOP(CPU_SET_VERTEXVELS); +} + +#ifdef ONEDIMS_SPHERICAL +/*! \brief Handles inner boundary cells in 1d spherical case. + * + * \return void + */ +static void validate_vertex_velocities_1d() +{ + double dt = (P[0].TimeBinHydro ? (((integertime)1) << P[0].TimeBinHydro) : 0) * All.Timebase_interval; + if(P[0].Pos[0] + dt * SphP[0].VelVertex[0] < All.CoreRadius) + SphP[0].VelVertex[0] = 0.; +} +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) +/*! \brief Checks validity of vertex velocities with boundary conditions. + * + * In case we have reflecting boundaries, make sure that cell does not drift + * beyond boundary. + * + * \return void + */ +void validate_vertex_velocities(void) +{ + int idx, i; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + integertime ti_step = P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0; + double dt_drift; + + if(All.ComovingIntegrationOn) + dt_drift = get_drift_factor(All.Ti_Current, All.Ti_Current + ti_step); + else + dt_drift = ti_step * All.Timebase_interval; + +#if defined(REFLECTIVE_X) + if((P[i].Pos[0] + dt_drift * SphP[i].VelVertex[0]) < 0 || (P[i].Pos[0] + dt_drift * SphP[i].VelVertex[0]) >= boxSize_X) + SphP[i].VelVertex[0] = 0; +#endif /* #if defined(REFLECTIVE_X) */ +#if defined(REFLECTIVE_Y) + if((P[i].Pos[1] + dt_drift * SphP[i].VelVertex[1]) < 0 || (P[i].Pos[1] + dt_drift * SphP[i].VelVertex[1]) >= boxSize_Y) + SphP[i].VelVertex[1] = 0; +#endif /* #if defined(REFLECTIVE_Y) */ +#if defined(REFLECTIVE_Z) + if((P[i].Pos[2] + dt_drift * SphP[i].VelVertex[2]) < 0 || (P[i].Pos[2] + dt_drift * SphP[i].VelVertex[2]) >= boxSize_Z) + SphP[i].VelVertex[2] = 0; +#endif /* #if defined(REFLECTIVE_Z) */ + } +} +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.c new file mode 100644 index 0000000000..cc6964c01b --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.c @@ -0,0 +1,1163 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi.c + * \date 05/2018 + * \brief Main file for Voronoi-mesh construction. + * \details contains functions: + * void create_mesh(void) + * int voronoi_get_local_particles(void) + * void free_mesh_structures_not_needed_for_derefinement_ + * refinement(void) + * void free_all_remaining_mesh_structures(void) + * void free_mesh(void) + * int compute_max_delaunay_radius(void) + * void compute_voronoi_faces_and_volumes(void) + * int area_list_data_compare(const void *a, const void *b) + * void apply_area_list(void) + * void derefine_refine_compute_volumes(double *vol) + * double nearest_x(double d) + * double nearest_y(double d) + * double nearest_z(double d) + * double get_cell_radius(int i) + * void dump_points(tessellation * T) + * int face_get_normals(tessellation * T, int i, struct + * geometry *geom) + * double distance_to_border(int cell) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +tessellation Mesh, DeRefMesh; + +unsigned char *Edge_visited; +struct area_list_data *AreaList; +int Narea, MaxNarea; + +int DPinfinity; /* marker for special infinity point */ +double CentralOffsetX, CentralOffsetY, CentralOffsetZ, ConversionFac; + +struct list_export_data *ListExports; +struct list_P_data *List_P; +int NumGasInMesh; +int *List_InMesh; + +int CountInSphereTests, CountInSphereTestsExact; +int CountConvexEdgeTest, CountConvexEdgeTestExact; +int Ninlist, MaxNinlist; + +int CountFlips, Count_1_to_3_Flips2d, Count_2_to_4_Flips2d; +int Count_1_to_4_Flips, Count_2_to_3_Flips, Count_3_to_2_Flips, Count_4_to_4_Flips; +int Count_EdgeSplits, Count_FaceSplits; +int Count_InTetra, Count_InTetraExact; +int Largest_N_DP_Buffer; + +long long TotCountInSphereTests, TotCountInSphereTestsExact; +long long TotCountConvexEdgeTest, TotCountConvexEdgeTestExact; + +long long TotCountFlips, TotCount_1_to_3_Flips2d, TotCount_2_to_4_Flips2d; +long long TotCount_1_to_4_Flips, TotCount_2_to_3_Flips, TotCount_3_to_2_Flips, TotCount_4_to_4_Flips; +long long TotCount_EdgeSplits, TotCount_FaceSplits; +long long TotCount_InTetra, TotCount_InTetraExact; + +/*! \brief Creates the Voronoi mesh. + * + * Routine which is called in run. + * If first creates a first, giant tetrahedron and than successively insert + * particles (first local, then ghost particles) compute their circumcircles + * and count the undecided tetrahedra. This procedure is repeated until all + * tetrahedra are decided. Then, the maximum Delauny radius is computed as + * well as the faces and volumes of the Voronoi-cells. + * + * \return void + */ +void create_mesh(void) +{ +#ifdef CREATE_FULL_MESH + int k; + + short int *buTimeBin = mymalloc_movable(&buTimeBin, "buTimeBin", NumPart * sizeof(short int)); + static int buTimeBinActive[TIMEBINS]; + + for(k = 0; k < NumPart; k++) + { + buTimeBin[k] = P[k].TimeBinHydro; + P[k].TimeBinHydro = 0; + } + + for(k = 0; k < TIMEBINS; k++) + { + buTimeBinActive[k] = TimeBinSynchronized[k]; + + TimeBinSynchronized[k] = 1; + } + + reconstruct_timebins(); +#endif /* #ifdef CREATE_FULL_MESH */ + + int tlast; + int idx, i, iter = 0, n, skip; + double tstart, tend; + long long ntot; + + if(All.TotNumGas == 0) + return; + + TIMER_START(CPU_MESH); + + mpi_printf("VORONOI: create delaunay mesh\n"); + + Ngb_MarkerValue++; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].Ti_Current != All.Ti_Current) + { + terminate("surprise! we don't expect this here anymore"); + drift_particle(i, All.Ti_Current); + } + + SphP[i].Hsml = 1.01 * SphP[i].MaxDelaunayRadius; + } + + initialize_and_create_first_tetra(&Mesh); + + CountInSphereTests = CountInSphereTestsExact = 0; + CountConvexEdgeTest = CountConvexEdgeTestExact = 0; + CountFlips = Count_1_to_3_Flips2d = Count_2_to_4_Flips2d = 0; + Count_1_to_4_Flips = 0; + Count_2_to_3_Flips = 0; + Count_3_to_2_Flips = 0; + Count_4_to_4_Flips = 0; + Count_EdgeSplits = 0; + Count_FaceSplits = 0; + Count_InTetra = Count_InTetraExact = 0; + Largest_N_DP_Buffer = 0; + + MaxNinlist = Mesh.Indi.AllocFacNinlist; + ListExports = mymalloc_movable(&ListExports, "ListExports", MaxNinlist * sizeof(struct list_export_data)); + + NumGasInMesh = 0; + List_InMesh = mymalloc_movable(&List_InMesh, "List_InMesh", NumGas * sizeof(int)); + + List_P = mymalloc_movable(&List_P, "List_P", NumGas * sizeof(struct list_P_data)); + + Mesh.DTC = mymalloc_movable(&Mesh.DTC, "DTC", Mesh.MaxNdt * sizeof(tetra_center)); + Mesh.DTF = mymalloc_movable(&Mesh.DTF, "DTF", Mesh.MaxNdt * sizeof(char)); + for(i = 0; i < Mesh.Ndt; i++) + Mesh.DTF[i] = 0; + + Ninlist = 0; + + tlast = 0; + + do + { + skip = Mesh.Ndp; + + TIMER_STOPSTART(CPU_MESH, CPU_MESH_FIND_DP); + + tstart = second(); + + if(iter == 0) + { + MPI_Allreduce(&Nvc, &Largest_Nvc, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + if(Largest_Nvc > 0) + n = voronoi_get_connected_particles(&Mesh); + else + n = voronoi_get_local_particles(); + } + else + { + n = voronoi_ghost_search(&Mesh); + } + + sumup_large_ints(1, &n, &ntot); + + tend = second(); + + if(iter == 0) + mpi_printf("VORONOI: iter=%d: %llu local points, points/sec/task = %g, took %g secs\n", iter, ntot, + ntot / (timediff(tstart, tend) + 1.0e-30) / NTask, timediff(tstart, tend)); + else + { + if(ntot) + mpi_printf("VORONOI: iter=%d: %llu additional points, points/sec/task = %g, took %g secs\n", iter, ntot, + ntot / (timediff(tstart, tend) + 1.0e-30) / NTask, timediff(tstart, tend)); + else + mpi_printf("VORONOI: iter=%d: %llu additional points, took %g secs\n", iter, ntot, timediff(tstart, tend)); + } + + TIMER_STOPSTART(CPU_MESH_FIND_DP, CPU_MESH_INSERT); + + for(i = 0; i < n; i++) + { +#ifndef OPTIMIZE_MEMORY_USAGE + set_integers_for_point(&Mesh, skip + i); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + tlast = insert_point(&Mesh, skip + i, tlast); + } + + TIMER_STOPSTART(CPU_MESH_INSERT, CPU_MESH_CELLCHECK); + + compute_circumcircles(&Mesh); + + if(iter > 0) + { + n = count_undecided_tetras(&Mesh); + + sumup_large_ints(1, &n, &ntot); + + if(ntot) + { + mpi_printf("VORONOI: still undecided %llu tetrahedras\n", ntot); + +#ifndef DOUBLE_STENCIL + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + SphP[i].Hsml *= HSML_INCREASE_FACTOR; + } +#else /* #ifndef DOUBLE_STENCIL */ + for(i = 0; i < Mesh.Ndp; i++) + Mesh.DP[i].Hsml *= HSML_INCREASE_FACTOR; +#endif /* #ifndef DOUBLE_STENCIL #else */ + } + } + else + { + ntot = 1; + } + + TIMER_STOPSTART(CPU_MESH_CELLCHECK, CPU_MESH); + + if(iter > MAX_VORONOI_ITERATIONS) + terminate("too many iterations\n"); + + iter++; + } + while(ntot > 0); + +#if(REFLECTIVE_X == 2) || (REFLECTIVE_Y == 2) || (REFLECTIVE_Z == 2) + for(i = 0; i < Mesh.Ndp; i++) + { +#if(REFLECTIVE_X == 2) + Mesh.DP[i].image_flags |= OUTFLOW_X; +#endif /* #if (REFLECTIVE_X == 2) */ +#if(REFLECTIVE_Y == 2) + Mesh.DP[i].image_flags |= OUTFLOW_Y; +#endif /* #if (REFLECTIVE_Y == 2) */ +#if(REFLECTIVE_Z == 2) + Mesh.DP[i].image_flags |= OUTFLOW_Z; +#endif /* #if (REFLECTIVE_Z == 2) */ + } +#endif /* #if (REFLECTIVE_X == 2) || (REFLECTIVE_Y == 2) || (REFLECTIVE_Z == 2) */ + + compute_max_delaunay_radius(); + + TIMER_STOPSTART(CPU_MESH, CPU_LOGS); + +#ifdef VERBOSE + long long TotNdp, TotNdt; + + int in[15]; + long long out[15]; + + in[0] = Mesh.Ndp; + in[1] = Mesh.Ndt; + in[2] = CountInSphereTests; + in[3] = CountInSphereTestsExact; + in[4] = CountFlips; + in[5] = Count_InTetra; + in[6] = Count_InTetraExact; +#ifndef TWODIMS + in[7] = Count_1_to_4_Flips; + in[8] = Count_2_to_3_Flips; + in[9] = Count_3_to_2_Flips; + in[10] = Count_4_to_4_Flips; + in[11] = Count_FaceSplits; + in[12] = Count_EdgeSplits; + in[13] = CountConvexEdgeTest; + in[14] = CountConvexEdgeTestExact; + n = 15; +#else /* #ifndef TWODIMS */ + in[7] = Count_1_to_3_Flips2d; + in[8] = Count_2_to_4_Flips2d; + n = 9; +#endif /* #ifndef TWODIMS #else */ + + sumup_large_ints(n, in, out); + + TotNdp = out[0]; + TotNdt = out[1]; + TotCountInSphereTests = out[2]; + TotCountInSphereTestsExact = out[3]; + TotCountFlips = out[4]; + TotCount_InTetra = out[5]; + TotCount_InTetraExact = out[6]; +#ifndef TWODIMS + TotCount_1_to_4_Flips = out[7]; + TotCount_2_to_3_Flips = out[8]; + TotCount_3_to_2_Flips = out[9]; + TotCount_4_to_4_Flips = out[10]; + TotCount_FaceSplits = out[11]; + TotCount_EdgeSplits = out[12]; + TotCountConvexEdgeTest = out[13]; + TotCountConvexEdgeTestExact = out[14]; +#else /* #ifndef TWODIMS */ + TotCount_1_to_3_Flips2d = out[7]; + TotCount_2_to_4_Flips2d = out[8]; +#endif /* #ifndef TWODIMS #else */ + + if(ThisTask == 0) + { +#ifndef TWODIMS + printf( + "VORONOI: Average D-Points=%llu (NumGas=%llu) D-Tetrahedra=%llu InSphereTests=%llu InSphereTestsExact=%llu " + "Flips=%llu\n", + TotNdp / NTask, All.TotNumGas / NTask, TotNdt / NTask, TotCountInSphereTests / NTask, TotCountInSphereTestsExact / NTask, + TotCountFlips / NTask); + printf("VORONOI: 1_to_4_Flips=%llu 2_to_3_Flips=%llu 3_to_2_Flips=%llu 4_to_4_Flips=%llu FaceSplits=%llu EdgeSplits=%llu\n", + TotCount_1_to_4_Flips / NTask, TotCount_2_to_3_Flips / NTask, TotCount_3_to_2_Flips / NTask, + TotCount_4_to_4_Flips / NTask, TotCount_FaceSplits / NTask, TotCount_EdgeSplits / NTask); + printf("VORONOI: InTetra=%llu InTetraExact=%llu ConvexEdgeTest=%llu ConvexEdgeTestExact=%llu\n", TotCount_InTetra, + TotCount_InTetraExact / NTask, TotCountConvexEdgeTest / NTask, TotCountConvexEdgeTestExact / NTask); +#else /* #ifndef TWODIMS */ + printf( + "VORONOI: Average D-Points=%llu (NumGas=%llu) D-Triangles=%llu InCircleTests=%llu InCircleTestsExact=%llu Flips=%llu\n", + TotNdp / NTask, All.TotNumGas / NTask, TotNdt / NTask, TotCountInSphereTests / NTask, TotCountInSphereTestsExact / NTask, + TotCountFlips / NTask); + printf("VORONOI: 1_to_3_Flips=%llu 2_to_4_Flips=%llu InTriangle=%llu InTriangleExact=%llu\n", TotCount_1_to_3_Flips2d / NTask, + TotCount_2_to_4_Flips2d / NTask, TotCount_InTetra / NTask, TotCount_InTetraExact / NTask); +#endif /* #ifndef TWODIMS #else */ + printf("VORONOI: Total D-Points: %llu Ratio=%g\n", TotNdp, ((double)TotNdp) / All.TotNumGas); + } +#endif /* #ifdef VERBOSE */ + + TIMER_STOPSTART(CPU_LOGS, CPU_MESH_GEOMETRY); + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].Volume = 0; + SphP[i].SurfaceArea = 0; +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + SphP[i].MaxFaceAngle = 0; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ +#ifdef OUTPUT_SURFACE_AREA + SphP[i].CountFaces = 0; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + } + + compute_voronoi_faces_and_volumes(); + + double vol, voltot; + + vol = 0; + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + vol += SphP[i].Volume; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + P[i].SofteningType = get_softeningtype_for_hydro_cell(i); +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + } + + MPI_Reduce(&vol, &voltot, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + mpi_printf("VORONOI: Total volume of active cells = %g\n", voltot); + + TIMER_STOP(CPU_MESH_GEOMETRY); + + voronoi_update_connectivity(&Mesh); + + myfree(Mesh.DTF); + + if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) /* only do this for full steps */ + { + /* check whether we can reduce allocation factors */ + while(Mesh.Ndp < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNdp && Mesh.Indi.AllocFacNdp > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacNdp /= ALLOC_INCREASE_FACTOR; + + while(Mesh.Ndt < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNdt && Mesh.Indi.AllocFacNdt > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacNdt /= ALLOC_INCREASE_FACTOR; + + while(Mesh.Nvf < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNvf && Mesh.Indi.AllocFacNvf > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacNvf /= ALLOC_INCREASE_FACTOR; + + while(Ninlist < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNinlist && Mesh.Indi.AllocFacNinlist > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacNinlist /= ALLOC_INCREASE_FACTOR; + + while(Largest_N_DP_Buffer < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacN_DP_Buffer && + Mesh.Indi.AllocFacN_DP_Buffer > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacN_DP_Buffer /= ALLOC_INCREASE_FACTOR; + } + +#ifdef CREATE_FULL_MESH + for(k = 0; k < TIMEBINS; k++) + TimeBinSynchronized[k] = buTimeBinActive[k]; + + for(k = 0; k < NumPart; k++) + P[k].TimeBinHydro = buTimeBin[k]; + + reconstruct_timebins(); + + myfree_movable(buTimeBin); +#endif /* #if defined(CREATE_FULL_MESH) */ +} + +/*! \brief Routine that fetches local gas cells. + * + * Runs through all active particles and inserts active gas cells into mesh + * structure. Increases length of Mesh.DP and ListExports arrays if needed. + * + * \return Number of points. + */ +int voronoi_get_local_particles(void) +{ + int p, idx, count = 0; + + /* first, let's add all the primary active points */ + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + p = TimeBinsHydro.ActiveParticleList[idx]; + + if(p < 0) + continue; + + if(P[p].Type == 0) + { + Ngb_Marker[p] = Ngb_MarkerValue; + + if((P[p].Mass == 0) && (P[p].ID == 0)) /* skip cells that have been swallowed or eliminated */ + { + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + continue; + } + + if(Ninlist >= MaxNinlist) + { + Mesh.Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = Mesh.Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("VORONOI: Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + Mesh.Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 1; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = ThisTask; + ListExports[List_P[p].currentexport].index = p; + + if(Mesh.Ndp >= Mesh.MaxNdp) + { + Mesh.Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + Mesh.MaxNdp = Mesh.Indi.AllocFacNdp; +#ifdef VERBOSE + printf("VORONOI: Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, Mesh.MaxNdp, + Mesh.Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + Mesh.DP -= 5; + Mesh.DP = myrealloc_movable(Mesh.DP, (Mesh.MaxNdp + 5) * sizeof(point)); + Mesh.DP += 5; + + if(Mesh.Ndp >= Mesh.MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + point *dp = &Mesh.DP[Mesh.Ndp]; + + dp->x = P[p].Pos[0]; + dp->y = P[p].Pos[1]; + dp->z = P[p].Pos[2]; + dp->ID = P[p].ID; + dp->task = ThisTask; + dp->index = p; + dp->originalindex = -1; + dp->timebin = P[p].TimeBinHydro; + dp->image_flags = 1; +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + + Mesh.Ndp++; + count++; + } + } + + return count; +} + +#ifdef REFINEMENT +struct refdata *RefExch; + +/*! \brief Structures that are freed before refinement and derefinement step. + * + * To Optimize the memory usage, this, in comubnation with + * free_all_remaining_mesh_structures() can be used instead of a free_mesh() + * after the refinement. This saves some memory. + * + * \return void + */ +void free_mesh_structures_not_needed_for_derefinement_refinement(void) +{ + if(All.TotNumGas == 0) + return; + + int i; + + myfree(GradExch); + + RefExch = (struct refdata *)mymalloc_movable(&RefExch, "RefExch", Mesh_nimport * sizeof(struct refdata)); + + for(i = 0; i < Mesh_nimport; i++) + { +#ifdef REFINEMENT_VOLUME_LIMIT + RefExch[i].Volume = PrimExch[i].Volume; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + RefExch[i].TimeBinHydro = PrimExch[i].TimeBinHydro; + } + + myfree_movable(PrimExch); +} + +/* \brief Structures that are freed after refinement and derefinement step. + * + * To Optimize the memory usage, this, in comubnation with + * free_mesh_structures_not_needed_for_derefinement_refinement(void) can be + * used instead of a free_mesh() after the refinement. This saves some memory. + * + * \return void + */ +void free_all_remaining_mesh_structures(void) +{ + if(All.TotNumGas == 0) + return; + + myfree(RefExch); + + myfree(Mesh.DTC); /* here we can free the centers of the Delaunay triangles again */ + Mesh.DTC = NULL; + myfree(List_P); + myfree(List_InMesh); + myfree(ListExports); + myfree(Mesh.DT); + myfree(Mesh.DP - 5); + myfree(Mesh.VF); +} +#endif /* #ifdef REFINEMENT */ + +/*! \brief Frees arrays associated with Voronoi-mesh. + * + * \return void + */ +void free_mesh(void) +{ + if(All.TotNumGas == 0) + return; + +#if defined(DOUBLE_STENCIL) + mpi_printf("freeing double stencil connections...\n"); + int i; + for(i = 0; i < Mesh.Ndp; i++) + if(Mesh.DP[i].first_connection >= 0) + { + if(Mesh.DP[i].flag_primary_triangle == 0) + terminate("Mesh.DP[i].flag_primary_triangle"); + + int q = Mesh.DP[i].first_connection; + + if(q >= 0) /* we have connections, let's add them to the free list */ + { + while(q >= 0) + { + Nvc--; + DC[q].task = -1; /* mark that this is unused */ + + if(q == Mesh.DP[i].last_connection) + break; + + q = DC[q].next; + } + + /* we add the new free spots at the beginning of the free list */ + DC[Mesh.DP[i].last_connection].next = FirstUnusedConnection; + FirstUnusedConnection = Mesh.DP[i].first_connection; + + Mesh.DP[i].first_connection = -1; + Mesh.DP[i].last_connection = -1; + } + } + mpi_printf("done with freeing double stencil connections.\n"); +#endif /* #if defined(DOUBLE_STENCIL) */ + + myfree_movable(GradExch); + myfree_movable(PrimExch); + + myfree_movable(Mesh.DTC); /* here we can free the centers of the Delaunay triangles again */ + Mesh.DTC = NULL; + myfree_movable(List_P); + myfree_movable(List_InMesh); + myfree_movable(ListExports); + myfree_movable(Mesh.DT); + myfree_movable(Mesh.DP - 5); + myfree_movable(Mesh.VF); +} + +/*! \brief Get the maximum Delaunay radius for all active cells. + * + * Defined as the maximum distance between tetrahedron center and its + * neighboring points. Stores this radius in the respective field in the + * SphP structure. + * + * \return 0 (unused). + */ +int compute_max_delaunay_radius(void) +{ + int idx, i, j, count = 0; + point *p; + double dx, dy, dz, r; + +#ifdef ONEDIMS + return 0; +#endif /* #ifdef ONEDIMS */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].MaxDelaunayRadius = 0; + } + + point *DP = Mesh.DP; + tetra *DT = Mesh.DT; + tetra_center *DTC = Mesh.DTC; + + for(i = 0; i < Mesh.Ndt; i++) + { + if(DT[i].t[0] < 0) /* deleted ? */ + continue; + + dx = DP[DT[i].p[0]].x - DTC[i].cx; + dy = DP[DT[i].p[0]].y - DTC[i].cy; + dz = DP[DT[i].p[0]].z - DTC[i].cz; + + r = 2 * sqrt(dx * dx + dy * dy + dz * dz); + + for(j = 0; j < (DIMS + 1); j++) + { + p = &DP[DT[i].p[j]]; + + if(p->task == ThisTask && p->index < NumGas && p->index >= 0) + if(TimeBinSynchronized[P[p->index].TimeBinHydro]) + if(r > SphP[p->index].MaxDelaunayRadius) + SphP[p->index].MaxDelaunayRadius = r; + } + } + + return count; +} + +#ifndef ONEDIMS +/*! \brief Computes interface areas volume of cells. + * + * Loops over Delaunay tetrahedra to calculate interface area and volume + * contributions to the individual cells. Calculates as well the center of + * mass. + * + * \return void + */ +void compute_voronoi_faces_and_volumes(void) +{ + int idx, i, bit, nr; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].Volume = 0; + SphP[i].Center[0] = 0; + SphP[i].Center[1] = 0; + SphP[i].Center[2] = 0; +#if defined(REFINEMENT_SPLIT_CELLS) + SphP[i].MinimumEdgeDistance = MAX_FLOAT_NUMBER; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + } + + Edge_visited = mymalloc_movable(&Edge_visited, "Edge_visited", Mesh.Ndt * sizeof(unsigned char)); + + for(i = 0; i < Mesh.Ndt; i++) + Edge_visited[i] = 0; + + MaxNarea = Mesh.Indi.AllocFacNflux; + Narea = 0; + AreaList = mymalloc_movable(&AreaList, "AreaList", MaxNarea * sizeof(struct area_list_data)); + + for(i = 0; i < Mesh.Ndt; i++) + { + if(Mesh.DT[i].t[0] < 0) /* deleted ? */ + continue; + + bit = 1; + nr = 0; + + while(Edge_visited[i] != EDGE_ALL) + { + if((Edge_visited[i] & bit) == 0) + process_edge_faces_and_volumes(&Mesh, i, nr); + + bit <<= 1; + nr++; + } + } + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(SphP[i].Volume) + { + SphP[i].Center[0] /= SphP[i].Volume; + SphP[i].Center[1] /= SphP[i].Volume; + SphP[i].Center[2] /= SphP[i].Volume; + } + } + + apply_area_list(); + myfree(AreaList); + + myfree(Edge_visited); +} + +/*! \brief Compare task of two area_list_data structures. + * + * \param[in] a Pointer to first area_list_data structure. + * \param[in] b Pointer to second area_list_data structure. + * + * \return (-1,0,1), -1 if a.tasktask < (((struct area_list_data *)b)->task)) + return -1; + + if(((struct area_list_data *)a)->task > (((struct area_list_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Sorts all interface areas and adds them to respective mesh + * generating points (ActiveArea). + * + * \return void + */ +void apply_area_list(void) +{ + int i, j, p, nimport, ngrp, recvTask; + + /* now exchange the area-list and apply where needed */ + + mysort(AreaList, Narea, sizeof(struct area_list_data), area_list_data_compare); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < Narea; i++) + Send_count[AreaList[i].task]++; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct area_list_data *AreaListGet = (struct area_list_data *)mymalloc("AreaListGet", nimport * sizeof(struct area_list_data)); + + /* exchange particle data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&AreaList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct area_list_data), MPI_BYTE, recvTask, + TAG_DENS_A, &AreaListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct area_list_data), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + /* apply the area */ + for(i = 0; i < nimport; i++) + { + p = AreaListGet[i].index; + SphP[p].ActiveArea += AreaListGet[i].darea; + } + + myfree(AreaListGet); +} + +/*! \brief Calculates volumes of all cells that are created in refinement. + * + * \param[out] vol Volumes of cells. + * + * \return void + */ +void derefine_refine_compute_volumes(double *vol) +{ + int i, bit, nr; + + for(i = 0; i < DeRefMesh.Ndp; i++) + vol[i] = 0; + + Edge_visited = mymalloc_movable(&Edge_visited, "Edge_visited", DeRefMesh.Ndt * sizeof(unsigned char)); + + for(i = 0; i < DeRefMesh.Ndt; i++) + Edge_visited[i] = 0; + + for(i = 0; i < DeRefMesh.Ndt; i++) + { + if(DeRefMesh.DT[i].t[0] < 0) /* deleted ? */ + continue; + + bit = 1; + nr = 0; + + while(Edge_visited[i] != EDGE_ALL) + { + if((Edge_visited[i] & bit) == 0) + derefine_refine_process_edge(&DeRefMesh, vol, i, nr); + + bit <<= 1; + nr++; + } + } + + myfree(Edge_visited); +} + +#endif /* #ifndef ONEDIMS */ + +/*! \brief Nearest distance in x direction, accounting for periodicity. + * + * \param[in] d Distance to be checked. + * + * \return Nearest distance. + */ +double nearest_x(double d) +{ +#if !defined(REFLECTIVE_X) + if(d < -boxHalf_X) + d += boxSize_X; + if(d > boxHalf_X) + d -= boxSize_X; +#endif /* #if !defined(REFLECTIVE_X) */ + return d; +} + +/*! \brief Nearest distance in y direction, accounting for periodicity. + * + * \param[in] d Distance to be checked. + * + * \return Nearest distance. + */ +double nearest_y(double d) +{ +#if !defined(REFLECTIVE_Y) + if(d < -boxHalf_Y) + d += boxSize_Y; + if(d > boxHalf_Y) + d -= boxSize_Y; +#endif /* #if !defined(REFLECTIVE_Y) */ + return d; +} + +/* \brief Nearest distance in z direction, accounting for periodicity. + * + * \param[in] d Distance to be checked. + * + * \return Nearest distance. + */ +double nearest_z(double d) +{ +#if !defined(REFLECTIVE_Z) + if(d < -boxHalf_Z) + d += boxSize_Z; + if(d > boxHalf_Z) + d -= boxSize_Z; +#endif /* #if !defined(REFLECTIVE_Z) */ + return d; +} + +/*! \brief Gets "radius" of a cell. + * + * Defined as the radius of a sphere with the same volume as the Voronoi cell. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return radius of cell i. + */ +double get_cell_radius(int i) +{ + double cellrad; + +#ifdef TWODIMS + cellrad = sqrt(SphP[i].Volume / M_PI); +#else /* #ifdef TWODIMS */ +#ifdef ONEDIMS +#ifdef ONEDIMS_SPHERICAL + cellrad = 0.5 * (Mesh.VF[i + 1].cx - Mesh.VF[i].cx); +#else /* #ifdef ONEDIMS_SPHERICAL */ + cellrad = 0.5 * SphP[i].Volume; +#endif /* #ifdef ONEDIMS_SPHERICAL #else */ +#else /* #ifdef ONEDIMS */ + cellrad = pow(SphP[i].Volume * 3.0 / (4.0 * M_PI), 1.0 / 3); +#endif /* #ifdef ONEDIMS #else */ +#endif /* #ifdef TWODIMS */ + return cellrad; +} + +/*! \brief Writes a file points_X.dat with Delaunay points. + * + * Writes position as in DP structure. + * + * \param[in] T tessellation for which Delaunay point positions should be + * written. + * + * \return void + */ +void dump_points(tessellation *T) +{ + FILE *fd; + int i; + double xyz[3]; + char buf[1000]; + + sprintf(buf, "points_%d.dat", ThisTask); + fd = fopen(buf, "w"); + my_fwrite(&T->Ndp, sizeof(int), 1, fd); + for(i = 0; i < T->Ndp; i++) + { + xyz[0] = T->DP[i].x; + xyz[1] = T->DP[i].y; + xyz[2] = T->DP[i].z; + my_fwrite(xyz, sizeof(double), 3, fd); + } + fclose(fd); +} + +/*! \brief Calculates the normals to given interfaces. + * + * \param[in] T Pointer to tesslation data. + * \param[in] i Index of Voronoi-face in tesslation T. + * \param[out] geom Pointer to structure to which normal data is written. + * + * \return 0 if success, -1 if interface can be ignored. + */ +int face_get_normals(tessellation *T, int i, struct geometry *geom) +{ + int li, ri; + double surface, surface_l, surface_r; + int present_left, present_right; + double mm; + + face *VF = T->VF; + point *DP = T->DP; + + li = DP[VF[i].p1].index; + ri = DP[VF[i].p2].index; + + if(li < 0 || ri < 0) + return -1; + + if(li >= NumGas && DP[VF[i].p1].task == ThisTask) + li -= NumGas; + + if(ri >= NumGas && DP[VF[i].p2].task == ThisTask) + ri -= NumGas; + + if(DP[VF[i].p1].task == ThisTask) + surface_l = SphP[li].SurfaceArea; + else + surface_l = PrimExch[li].SurfaceArea; + + if(DP[VF[i].p2].task == ThisTask) + surface_r = SphP[ri].SurfaceArea; + else + surface_r = PrimExch[ri].SurfaceArea; + + if(surface_r > surface_l) + surface = 1.0e-5 * surface_r; + else + surface = 1.0e-5 * surface_l; + + present_left = present_right = 0; + + /* if the area of this face is negligible compared to the surface + of the larger cell, skip it */ + if(DP[VF[i].p1].task == ThisTask && DP[VF[i].p1].index < NumGas) + if(TimeBinSynchronized[P[DP[VF[i].p1].index].TimeBinHydro]) + if(VF[i].area > surface) + present_left = 1; + + if(DP[VF[i].p2].task == ThisTask && DP[VF[i].p2].index < NumGas) + if(TimeBinSynchronized[P[DP[VF[i].p2].index].TimeBinHydro]) + if(VF[i].area > surface) + present_right = 1; + + if(present_left == 0 && present_right == 0) + { +#ifndef VORONOI_STATIC_MESH + VF[i].area = 0; +#endif /* #ifndef VORONOI_STATIC_MESH */ + return -1; + } + + /* center of face */ + geom->cx = VF[i].cx; + geom->cy = VF[i].cy; + geom->cz = VF[i].cz; + + /* normal vector pointing to "right" state */ + geom->nx = DP[VF[i].p2].x - DP[VF[i].p1].x; + geom->ny = DP[VF[i].p2].y - DP[VF[i].p1].y; + geom->nz = DP[VF[i].p2].z - DP[VF[i].p1].z; + + geom->nn = sqrt(geom->nx * geom->nx + geom->ny * geom->ny + geom->nz * geom->nz); + geom->nx /= geom->nn; + geom->ny /= geom->nn; + geom->nz /= geom->nn; + + /* need an ortonormal basis */ + if(geom->nx != 0 || geom->ny != 0) + { + geom->mx = -geom->ny; + geom->my = geom->nx; + geom->mz = 0; + } + else + { + geom->mx = 1; + geom->my = 0; + geom->mz = 0; + } + + mm = sqrt(geom->mx * geom->mx + geom->my * geom->my + geom->mz * geom->mz); + geom->mx /= mm; + geom->my /= mm; + geom->mz /= mm; + + geom->px = geom->ny * geom->mz - geom->nz * geom->my; + geom->py = geom->nz * geom->mx - geom->nx * geom->mz; + geom->pz = geom->nx * geom->my - geom->ny * geom->mx; + + return 0; +} + +/*! \brief Calculates distance of a cell to boundary of computational box. + * + * \param[in] cell Index of cell in P and SphP structure. + * + * \return Distance to border. + */ +double distance_to_border(int cell) +{ + double d1 = boxSize_X - P[cell].Pos[0]; + assert(d1 > 0); + + double d2 = P[cell].Pos[0]; + + double min = fmin(d1, d2); + + d1 = boxSize_Y - P[cell].Pos[1]; + assert(d1 > 0); + + d2 = P[cell].Pos[1]; + + double min2 = fmin(d1, d2); + min = fmin(min, min2); + + d1 = boxSize_Z - P[cell].Pos[2]; + assert(d1 > 0); + + d2 = P[cell].Pos[2]; + min2 = fmin(d1, d2); + + min = fmin(min, min2); + + return min; +} diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi.h b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.h new file mode 100644 index 0000000000..31aaae1ecb --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.h @@ -0,0 +1,379 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi.h + * \date 05/2018 + * \brief Header for Voronoi mesh-construcion + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef HAVE_H_VORONOI +#define HAVE_H_VORONOI + +#include + +#define STACKSIZE_TETRA 10000 +#define MIN_ALLOC_NUMBER 1000 +#define ALLOC_INCREASE_FACTOR 1.1 +#define ALLOC_DECREASE_FACTOR 0.7 +#define MAX_VORONOI_ITERATIONS 500 + +#define GENTLE_DEREFINE_FACTOR 1.2 + +#define USEDBITS 52 + +#if USEDBITS > 31 +typedef signed long long int IntegerMapType; +void MY_mpz_set_si(mpz_t dest, signed long long int val); +void MY_mpz_mul_si(mpz_t prod, mpz_t mult, signed long long int val); +void MY_mpz_sub_ui(mpz_t prod, mpz_t mult, unsigned long long int val); +#else /* #if USEDBITS > 31 */ +typedef signed long int IntegerMapType; +#define MY_mpz_set_si mpz_set_si +#define MY_mpz_mul_si mpz_mul_si +#define MY_mpz_sub_ui mpz_sub_ui +#endif /* #if USEDBITS > 31 #else */ + +#define DOUBLE_to_VORONOIINT(y) ((IntegerMapType)(((*((long long *)&y)) & 0xFFFFFFFFFFFFFllu) >> (52 - USEDBITS))) + +/* Prerequisites for this function: + * sizeof(double)==sizeof(unsigned long long) + * doubles must be stored according to IEEE 754 + */ +static inline IntegerMapType double_to_voronoiint(double d) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = d; + return (u.ull & 0xFFFFFFFFFFFFFllu) >> (52 - USEDBITS); +} + +static inline double mask_voronoi_int(double x) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = x; + u.ull = u.ull & (~((1llu << (52 - USEDBITS)) - 1)); + return u.d; +} + +#ifndef TWODIMS + +#define EDGE_0 1 /* points 0-1 */ +#define EDGE_1 2 /* points 0-2 */ +#define EDGE_2 4 /* points 0-3 */ +#define EDGE_3 8 /* points 1-2 */ +#define EDGE_4 16 /* points 1-3 */ +#define EDGE_5 32 /* points 2-3 */ +#define EDGE_ALL 63 + +#else /* #ifndef TWODIMS */ + +#define EDGE_0 1 /* points 1-2 */ +#define EDGE_1 2 /* points 0-2 */ +#define EDGE_2 4 /* points 0-1 */ +#define EDGE_ALL 7 + +#endif /* #ifndef TWODIMS #else */ + +#define HSML_INCREASE_FACTOR 1.3 + +#ifdef TWODIMS /* will only be compiled in 2D case */ +#define DIMS 2 +#else /* #ifdef TWODIMS */ +#define DIMS 3 +#endif /*#ifdef TWODIMS #else */ + +typedef struct +{ + double x, y, z; // The 3-space position of the point + MyIDType ID; + int task; // The MPI task owning this cell + int index; // The hydro quantity index of the cell + int originalindex, timebin; + unsigned int image_flags; + +#ifndef OPTIMIZE_MEMORY_USAGE + double xx, yy, zz; + IntegerMapType ix, iy, iz; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + +#ifdef DOUBLE_STENCIL + MyFloat Hsml; + int first_connection; + int last_connection; + char flag_primary_triangle; +#endif /* #ifdef DOUBLE_STENCIL */ +} point; + +typedef struct tetra_data +{ + int p[DIMS + 1]; /*!< oriented tetrahedron points */ + int t[DIMS + 1]; /*!< adjacent tetrahedrons, always opposite to corresponding point */ + unsigned char s[DIMS + 1]; /*!< gives the index of the point in the adjacent tetrahedron that + lies opposite to the common face */ + + /* Note: if t[0] == -1, the tetrahedron has been deleted */ +} tetra; + +typedef struct tetra_center_data +{ +#ifndef OPTIMIZE_MEMORY_USAGE + double cx, cy, cz; /*!< describes circumcircle center */ +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + MyFloat cx, cy, cz; +#endif /*#ifndef OPTIMIZE_MEMORY_USAGE */ +} tetra_center; + +typedef struct tri_data +{ + double p[DIMS + 1][DIMS]; + int owner; +} triangle; + +extern unsigned char *Edge_visited; + +extern struct list_export_data +{ + unsigned int image_bits; + int origin, index; + int nextexport; +} * ListExports; + +extern int Ninlist, MaxNinlist; + +extern struct area_list_data +{ + int task, index; + double darea; +} * AreaList; + +extern int Narea, MaxNarea; + +extern int NumGasInMesh; +extern int *List_InMesh; + +extern struct list_P_data +{ + int firstexport, currentexport; + +} * List_P; + +typedef struct connection_data +{ + int task; + int index; + int image_flags; + int next; + + int dp_index; /*!< this seems to be needed always the way voronoi_makeimage is implemented at the moment */ + int vf_index; /*!< index to the corresponding face */ +#if defined(TETRA_INDEX_IN_FACE) + int dt_index; +#endif /* #if defined(TETRA_INDEX_IN_FACE)*/ + MyIDType ID; +} connection; + +/*! This structure contains the points where a line segment intersects + * the tetrahedron faces and the internal voronoi faces. Is returned + * by calc_voronoi_intersections(). + */ +typedef struct intersection_list_data +{ + double s; /*!< the distance from the entry point (fraction of whole segment) */ + point p; /*!< the intersection point */ + int indA, indB; /*!< the indices of the tetra points (0-4) defining the face */ +} intersection_list; + +extern int CountInSphereTests, CountInSphereTestsExact; +extern int CountConvexEdgeTest, CountConvexEdgeTestExact; +extern int CountFlips, Count_1_to_3_Flips2d, Count_2_to_4_Flips2d; +extern int Count_1_to_4_Flips, Count_2_to_3_Flips, Count_3_to_2_Flips, Count_4_to_4_Flips; +extern int Count_EdgeSplits, Count_FaceSplits; +extern int Count_InTetra, Count_InTetraExact; +extern int Largest_N_DP_Buffer; + +extern int Ninlist, MaxNinlist; + +typedef struct individual_alloc_data +{ + double AllocFacNdp; + double AllocFacNdt; + double AllocFacNvf; + double AllocFacNinlist; + double AllocFacN_DP_Buffer; + double AllocFacNflux; + double AllocFacNradinflux; + double AllocFacNvc; +} mesh_alloc_facs; + +typedef struct tessellation_data +{ + int Ndp; /*!< number of delaunay points */ + int MaxNdp; /*!< maximum number of delaunay points */ + point *DP; /*!< delaunay points */ + + int Ndt; + int MaxNdt; /*!< number of delaunary tetrahedra */ + tetra *DT; /*!< Delaunay tetrahedra */ + tetra_center *DTC; /*!< circumcenters of delaunay tetrahedra */ + char *DTF; + + int Nvf; /*!< number of Voronoi faces */ + int MaxNvf; /*!< maximum number of Voronoi faces */ + face *VF; /*!< Voronoi faces */ + + mesh_alloc_facs Indi; +} tessellation; + +extern tessellation Mesh, DeRefMesh; + +extern int DPinfinity; + +extern int Nvc; /* number of connections */ +extern int MaxNvc; /* maximum number of connections */ +extern int Largest_Nvc; +extern connection *DC; /* Connections */ +extern int FirstUnusedConnection; + +extern double CentralOffsetX, CentralOffsetY, CentralOffsetZ, ConversionFac; + +int derefine_add_point_and_split_tri(int q, triangle *trilist, int n, int max_n, double vol); +void derefine_refine_process_edge(tessellation *T, double *vol, int tt, int nr); +void derefine_refine_compute_volumes(double *vol); +int derefine_refine_get_triangles(tessellation *T, int tt, int nr, point *dtip, triangle *trilist, int ntri, int max_n_tri); +void create_mesh(void); +void mesh_setup_exchange(void); +void free_mesh(void); +void free_mesh_structures_not_needed_for_derefinement_refinement(void); +void free_all_remaining_mesh_structures(void); +void apply_area_list(void); +int area_list_data_compare(const void *a, const void *b); +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask); +void initialize_and_create_first_tetra(tessellation *T); +void compute_voronoi_faces_and_volumes(void); +void get_line_segments(int sphp_index, int dp_index, double *segments, unsigned int *nof_elements, unsigned int max_elements); +double cross_section_plane_cell(int sphp_index, int dp_index, double *center, double *n); +void intersections_plane_cell(int sphp_index, int dp_index, double *center, double *n, double *polygon, unsigned int *nof_elements); +void intersection_plane_grid(double *center, double *n, const char *filename); +void process_edge_faces_and_volumes(tessellation *T, int tt, int nr); +int insert_point(tessellation *T, int pp, int ttstart); +void make_an_edge_split(tessellation *T, int tt0, int edge_nr, int count, int pp, int *ttlist); +void make_a_face_split(tessellation *T, int tt0, int face_nr, int pp, int tt1, int tt2, int qq1, int qq2); +double calculate_tetra_volume(point *p0, point *p1, point *p2, point *p3); +void make_a_4_to_4_flip(tessellation *T, int tt, int tip_index, int edge_nr); +double get_tri_volume(int i, triangle *trilist); +void make_a_1_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3); +void make_a_3_to_2_flip(tessellation *T, int tt0, int tt1, int tt2, int tip, int edge, int bottom); +void make_a_2_to_3_flip(tessellation *T, int tt0, int tip, int tt1, int bottom, int qq, int tt2); +int get_tetra(tessellation *T, point *p, int *moves, int ttstart, int *flag, int *edgeface_nr); +int InTetra(tessellation *T, int tt, point *pp, int *edgeface_nr, int *nexttetra); +double InSphere(point *p0, point *p1, point *p2, point *p3, point *p); +void update_circumcircle(tessellation *T, int tt); +int test_tetra_orientation(point *p0, point *p1, point *p2, point *p3); +int voronoi_ghost_search_alternative(tessellation *T); +void compute_circumcircles(tessellation *T); +int compute_max_delaunay_radius(void); +void check_for_min_distance(tessellation *T); +void check_links(tessellation *T); +void check_orientations(tessellation *T); +void check_tetras(tessellation *T, int npoints); +int voronoi_get_local_particles(void); +int convex_edge_test(tessellation *T, int tt, int tip, int *edgenr); +void calculate_gradients(void); +void limit_gradient(double *d, double phi, double min_phi, double max_phi, MySingle *dphi); +void exchange_primitive_variables(void); +void exchange_primitive_variables_and_gradients(void); +int compare_primexch(const void *a, const void *b); + +/* 2D voronoi routines */ +void check_edge_and_flip_if_needed(tessellation *T, int ip, int it); +int get_triangle(tessellation *T, int pp, int *moves, int *degenerate_flag, int ttstart); +double InCircle(point *p0, point *p1, point *p2, point *p); +void make_a_1_to_3_flip(tessellation *T, int pp, int tt0, int tt1, int tt2); +double test_triangle_orientation(tessellation *T, int pp0, int pp1, int pp2); +void make_a_2_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3, int i0, int j0); +void dump_points(tessellation *T); +void set_integers_for_pointer(point *p); + +#if !defined(ONEDIMS) +#ifndef OPTIMIZE_MEMORY_USAGE +static inline void set_integers_for_point(tessellation *T, int pp) +{ + point *p = &T->DP[pp]; + set_integers_for_pointer(p); +} +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ +static inline void get_integers_for_point(point *p, IntegerMapType ixyz[], double xyz[]) +{ + xyz[0] = (p->x - CentralOffsetX) * ConversionFac + 1.0; + xyz[1] = (p->y - CentralOffsetY) * ConversionFac + 1.0; + xyz[2] = (p->z - CentralOffsetZ) * ConversionFac + 1.0; + + ixyz[0] = double_to_voronoiint(xyz[0]); + ixyz[1] = double_to_voronoiint(xyz[1]); + ixyz[2] = double_to_voronoiint(xyz[2]); + + xyz[0] = mask_voronoi_int(xyz[0]); + xyz[1] = mask_voronoi_int(xyz[1]); + xyz[2] = mask_voronoi_int(xyz[2]); +} +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + +#else /* #if !defined(ONEDIMS) */ +void set_integers_for_point(tessellation *T, int pp); +#endif /* #if !defined(ONEDIMS) #else */ + +/* quick function to compare a point to the infinity point */ +static inline int isInfinity(point *p) { return p->x == MAX_DOUBLE_NUMBER; } + +int solve_linear_equations(double *m, double *res); +void check_triangles(tessellation *T, int npoints); +int InCircle_Quick(tessellation *T, int pp0, int pp1, int pp2, int pp); +int InCircle_Errorbound(tessellation *T, int pp0, int pp1, int pp2, int pp); +int InCircle_Exact(tessellation *T, int pp0, int pp1, int pp2, int pp); +int Orient2d_Exact(tessellation *T, int pp0, int pp1, int pp2); +int Orient2d_Quick(tessellation *T, int pp0, int pp1, int pp2); +int FindTriangle(tessellation *T, int tt, int pp, int *degnerate_flag, int *nexttetra); +int InSphere_Exact(point *p0, point *p1, point *p2, point *p3, point *p); +int InSphere_Quick(point *p0, point *p1, point *p2, point *p3, point *p); +int InSphere_Errorbound(point *p0, point *p1, point *p2, point *p3, point *p); +int Orient3d_Quick(point *p0, point *p1, point *p2, point *p3); +int Orient3d(point *p0, point *p1, point *p2, point *p3); +int Orient3d_Exact(point *p0, point *p1, point *p2, point *p3); +int count_undecided_tetras(tessellation *T); +int ngb_treefind_ghost_search(tessellation *T, MyDouble searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat maxdist, int target, + int origin, int mode, int thread_id, int numnodes, int *firstnode); +int voronoi_ghost_search_evaluate(tessellation *T, int target, int mode, int q, int thread_id); +int voronoi_ghost_search(tessellation *T); +double distance_to_border(int cell); + +#endif /* HAVE_H_VORONOI */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c new file mode 100644 index 0000000000..54c325cd3b --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c @@ -0,0 +1,363 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_1d.c + * \date 05/2018 + * \brief Routines to build a 1d Voronoi mesh + * \details Note that some of these routines have the same name as the ones + * in voronoi_2d.c and voronoi_3d.c and just replace them in case + * the Config-option ONEDIMS is active. This is also the reason + * why some of these functions are empty but nonetheless have to + * exist in this file. + * contains functions: + * void write_voronoi_mesh(tessellation * T, char *fname, + * int writeTask, int lastTask) + * void initialize_and_create_first_tetra(tessellation * T) + * void compute_circumcircles(tessellation * T) + * void set_integers_for_point(tessellation * T, int pp) + * int insert_point(tessellation * T, int pp, int ttstart) + * int voronoi_ghost_search(tessellation * T) + * int count_undecided_tetras(tessellation * T) + * int voronoi_ghost_search_alternative(tessellation * T) + * void compute_voronoi_faces_and_volumes(void) + * void voronoi_1D_order(void) + * int voronoi_1D_compare_key(const void *a, const void *b) + * void voronoi_1D_reorder_gas(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if defined(ONEDIMS) && !defined(ONEDIMS_SPHERICAL) /* will only be compiled in 1D case */ + +/*! \brief Output of Voroioi mesh to file. + * + * Not supported for 1d. + * + * \return void + */ +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask) +{ + terminate("write_voronoi_mesh not supported in 1d case!"); +} + +/*! \brief Initialises 1d tessellation and create all-enclosing segment. + * + * \param[out] T Pointer to tessllation structure which is set and its arrays + * are allocated in this routine. + * + * \return void + */ +void initialize_and_create_first_tetra(tessellation *T) +{ + char msg[200]; + + if(NTask > 1) + { + mpi_printf("1D code works only for 1 CPU\n"); + endrun(); + } + + T->MaxNdp = NumGas + 4; + T->MaxNdt = 4 + T->MaxNdp * 2; + T->MaxNvf = T->MaxNdt; + + if(NumGas == 0) + { + sprintf(msg, "NumGas=%d on Task=%d, but need at least one particle!\n", NumGas, ThisTask); + terminate(msg); + } + + T->Ndp = 0; + T->Nvf = 0; + T->Ndt = 0; + + T->VF = mymalloc_movable(&T->VF, "VF", T->MaxNvf * sizeof(face)); + + T->DP = mymalloc_movable(&T->DP, "DP", (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + T->DT = mymalloc_movable(&T->DT, "DT", T->MaxNdt * sizeof(tetra)); +} + +/*! \brief Computes circumcircles in 1d. + * + * Not necessary in 1d. However, this function has to exist for the 1d code + * to work. + * + * \param[in] T Pointer to tessllation structure. + * + * \return void + */ +void compute_circumcircles(tessellation *T) {} + +/*! \brief Empty funciton in 1d case. + * + * Not necessary in 1d. However, this function has to exist for the 1d code + * to work. + * + * \return void + */ +void set_integers_for_point(tessellation *T, int pp) {} + +/*! \brief Empty funciton in 1d case. + * + * Not necessary in 1d. However, this function has to exist for the 1d code + * to work. + * + * \return 0 + */ +int insert_point(tessellation *T, int pp, int ttstart) { return 0; } + +/*! \brief Wrapper routine to search for ghost cells for boundary cells. + * + * \param[out] T Pointer to tessellation. + * + * \return 0 + */ +int voronoi_ghost_search(tessellation *T) { return voronoi_ghost_search_alternative(T); } + +/*! \brief Empty funciton in 1d case. + * + * Not necessary in 1d. However, this function has to exist for the 1d code + * to work. + * + * \return 0 + */ +int count_undecided_tetras(tessellation *T) { return 0; } + +/*! \brief Searches for ghost cells in 1d Voronoi mesh. + * + * This routine assumes an x ordered cell array. + * + * \param[out] T pointer to tessellation. + * + * \return 0 + */ +int voronoi_ghost_search_alternative(tessellation *T) +{ + double xl, xr; + int index_l, index_r; + +#if defined(REFLECTIVE_X) + xl = -P[0].Pos[0]; + index_l = 0; + + xr = boxSize_X + (boxSize_X - P[NumGas - 1].Pos[0]); + index_r = NumGas - 1; +#else /* #if defined(REFLECTIVE_X) */ + xl = P[NumGas - 1].Pos[0] - boxSize_X; + index_l = NumGas - 1; + + xr = P[0].Pos[0] + boxSize_X; + index_r = 0; +#endif /* #if defined(REFLECTIVE_X) #else */ + + point *DP = T->DP; + + DP[-1].x = xl; + DP[-1].y = 0; + DP[-1].z = 0; + DP[-1].task = ThisTask; + DP[-1].ID = P[index_l].ID; + DP[-1].index = index_l + NumGas; /* this is a mirrored local point */ +#if defined(REFLECTIVE_X) + DP[-1].image_flags = REFL_X_FLAGS; +#if(REFLECTIVE_X == 2) + DP[-1].image_flags |= OUTFLOW_X; +#endif /* #if (REFLECTIVE_X == 2) */ +#endif /* #if defined(REFLECTIVE_X) */ + DP[NumGas].x = xr; + DP[NumGas].y = 0; + DP[NumGas].z = 0; + DP[NumGas].task = ThisTask; + DP[NumGas].ID = P[index_r].ID; + DP[NumGas].index = index_r + NumGas; /* this is a mirrored local point */ +#if defined(REFLECTIVE_X) + DP[NumGas].image_flags = REFL_X_FLAGS; +#if(REFLECTIVE_X == 2) + DP[NumGas].image_flags |= OUTFLOW_X; +#endif /* #if (REFLECTIVE_X == 2) */ +#endif /* #if defined(REFLECTIVE_X) */ + return 0; +} + +/*! \brief Computes faces and volume of cells in 1d Voronoi mesh. + * + * Also computes the center of mass. + * + * \return void + */ +void compute_voronoi_faces_and_volumes(void) +{ + int i; + + tessellation *T = &Mesh; + + T->Nvf = 0; + point *DP = T->DP; + face *VF = T->VF; + + for(i = -1; i < NumGas; i++) + { + VF[T->Nvf].p1 = i; + VF[T->Nvf].p2 = i + 1; + + VF[T->Nvf].cx = 0.5 * (DP[i].x + DP[i + 1].x); + + VF[T->Nvf].cy = 0; + VF[T->Nvf].cz = 0; + VF[T->Nvf].area = 1; + + T->Nvf++; + } + + for(i = 0; i < NumGas; i++) + { + SphP[i].Volume = VF[i + 1].cx - VF[i].cx; + SphP[i].Center[0] = 0.5 * (VF[i + 1].cx + VF[i].cx); + SphP[i].Center[1] = 0; + SphP[i].Center[2] = 0; + + SphP[i].SurfaceArea = 2.; + } +} + +/*! \brief Data for 1d Voronoi mesh. + */ +static struct voronoi_1D_data +{ + double x; + int index; +} * mp; + +static int *Id; + +/*! \brief Sort cells by their position and reorder in P and SphP array. + * + * \return void + */ +void voronoi_1D_order(void) +{ + int i; + + mpi_printf("begin 1D order...\n"); + + if(NumGas) + { + mp = (struct voronoi_1D_data *)mymalloc("mp", sizeof(struct voronoi_1D_data) * NumGas); + Id = (int *)mymalloc("Id", sizeof(int) * NumGas); + + for(i = 0; i < NumGas; i++) + { + mp[i].index = i; + mp[i].x = P[i].Pos[0]; + } + + mysort(mp, NumGas, sizeof(struct voronoi_1D_data), voronoi_1D_compare_key); + + for(i = 0; i < NumGas; i++) + Id[mp[i].index] = i; + + voronoi_1D_reorder_gas(); + + myfree(Id); + myfree(mp); + } + + mpi_printf("1D order done.\n"); +} + +/*! \brief Compare x value of voronoi_1D_data objects. + * + * \param[in] a Pointer to first voronoi_1D_data object. + * \param[in] b Pointer to second voronoi_1D_data object. + * + * \return (-1,0,1) -1 if a->x < b->x. + */ +int voronoi_1D_compare_key(const void *a, const void *b) +{ + if(((struct voronoi_1D_data *)a)->x < (((struct voronoi_1D_data *)b)->x)) + return -1; + + if(((struct voronoi_1D_data *)a)->x > (((struct voronoi_1D_data *)b)->x)) + return +1; + + return 0; +} + +/*! \brief Order the gas cells according to the index given in the ID array. + * + * \return void + */ +void voronoi_1D_reorder_gas(void) +{ + int i; + struct particle_data Psave, Psource; + struct sph_particle_data SphPsave, SphPsource; + int idsource, idsave, dest; + + for(i = 0; i < NumGas; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + SphPsource = SphP[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + Psave = P[dest]; + SphPsave = SphP[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + SphP[dest] = SphPsource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + SphPsource = SphPsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +#endif /* #if defined (ONEDIMS) && !defined (ONEDIMS_SPHERICAL) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c new file mode 100644 index 0000000000..c0212da41d --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c @@ -0,0 +1,339 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_1d_spherical.c + * \date 05/2018 + * \brief Routines to build a 1d Voronoi mesh in spherical coordinates. + * \details Note that some of these routines have the same name as the ones + * in voronoi_2d.c and voronoi_3d.c and just replace them in case + * the Config-option ONEDIMS is active. This is also the reason + * why some of these functions are empty but nonetheless have to + * exist in this file. + * contains functions: + * void write_voronoi_mesh(tessellation * T, char *fname, + * int writeTask, int lastTask) + * void initialize_and_create_first_tetra(tessellation * T) + * void compute_circumcircles(tessellation * T) + * void set_integers_for_point(tessellation * T, int pp) + * int insert_point(tessellation * T, int pp, int ttstart) + * int voronoi_ghost_search(tessellation * T) + * int count_undecided_tetras(tessellation * T) + * int voronoi_ghost_search_alternative(tessellation * T) + * void compute_voronoi_faces_and_volumes(void) + * void voronoi_1D_order(void) + * int voronoi_1D_compare_key(const void *a, const void *b) + * void voronoi_1D_reorder_gas(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if defined(ONEDIMS) && defined(ONEDIMS_SPHERICAL) /* will only be compiled in 1D spherical case */ + +/*! \brief Output of Voroioi mesh to file. + * + * Not supported for 1d spherical. + * + * \retur void + */ +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask) +{ + terminate("write_voronoi_mesh not supported in 1d spherical case!"); +} + +/*! \brief Initialises spherical 1d tesslation and create all-enclosing + * segment. + * + * \param[out] T Pointer to tessllation structure which is set and its arrays + * are allocated in this routine. + * + * \return void + */ +void initialize_and_create_first_tetra(tessellation *T) +{ + char msg[200]; + + if(NTask > 1) + { + mpi_terminate("1D code works only for 1 CPU\n"); + } + + T->MaxNdp = NumGas + 4; + T->MaxNdt = 4 + T->MaxNdp * 2; + T->MaxNvf = T->MaxNdt; + + if(NumGas == 0) + { + sprintf(msg, "NumGas=%d on Task=%d, but need at least one particle!\n", NumGas, ThisTask); + terminate(msg); + } + + T->Ndp = 0; + T->Nvf = 0; + T->Ndt = 0; + + T->VF = mymalloc("VF", T->MaxNvf * sizeof(face)); + + T->DP = mymalloc("DP", (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + T->DT = mymalloc("DT", T->MaxNdt * sizeof(tetra)); +} + +/*! \brief Computes circumcircles in 1d spherical coordinates. + * + * Not necessary in 1d spherical. However, this function has to exist for + * the 1d spherical code to work. + * + * \param[in] T Pointer to tessllation structure. + * + * \return void + */ +void compute_circumcircles(tessellation *T) {} + +/*! \brief Empty funciton in 1d spherical case. + * + * Not necessary in 1d spherical. However, this function has to exist for the + * 1d spherical code to work. + * + * \return void + */ +void set_integers_for_point(tessellation *T, int pp) {} + +/*! \brief Empty funciton in 1d spherical case. + * + * Not necessary in 1d spherical. However, this function has to exist for + * the 1d spherical code to work. + * + * \return 0 + */ +int insert_point(tessellation *T, int pp, int ttstart) { return 0; } + +/*! \brief Wrapper routine to search for ghost cells for boundary cells. + * + * \param[out] T Pointer to tessellation. + * + * \return 0 + */ +int voronoi_ghost_search(tessellation *T) { return voronoi_ghost_search_alternative(T); } + +/*! \brief Empty funciton in 1d spherical case. + * + * Not necessary in 1d spherical. However, this function has to exist for + * the 1d spherical code to work. + * + * \return 0 + */ +int count_undecided_tetras(tessellation *T) { return 0; } + +/*! \brief Searches for ghost cells in 1d spherical Voronoi mesh. + * + * This routine assumes an radius ordered cell array. + * + * \param[out] T pointer to tesslation. + * + * \return 0 + */ +int voronoi_ghost_search_alternative(tessellation *T) +{ + point *DP = T->DP; + + /* reflective inner boundaries */ + DP[-1].x = 2. * All.CoreRadius - P[0].Pos[0]; + DP[-1].y = 0; + DP[-1].z = 0; + DP[-1].task = ThisTask; + DP[-1].ID = P[0].ID; + DP[-1].index = NumGas; /* this is a mirrored local point */ + + /* outflow outer boundaries */ + DP[NumGas].x = boxSize_X + (boxSize_X - P[NumGas - 1].Pos[0]); + DP[NumGas].y = 0; + DP[NumGas].z = 0; + DP[NumGas].task = ThisTask; + DP[NumGas].ID = P[NumGas - 1].ID; + DP[NumGas].index = NumGas - 1 + NumGas; /* this is a mirrored local point */ + + return 0; +} + +/*! \brief Compute faces and volume of cells in 1d spherical Voronoi mesh. + * + * Also computes the center of mass. + * + * \return void + */ +void compute_voronoi_faces_and_volumes(void) +{ + int i; + + tessellation *T = &Mesh; + + T->Nvf = 0; + point *DP = T->DP; + face *VF = T->VF; + + for(i = -1; i < NumGas; i++) + { + VF[T->Nvf].p1 = i; + VF[T->Nvf].p2 = i + 1; + + VF[T->Nvf].cx = 0.5 * (DP[i].x + DP[i + 1].x); + VF[T->Nvf].cy = 0; + VF[T->Nvf].cz = 0; + VF[T->Nvf].area = 4. * M_PI * VF[T->Nvf].cx * VF[T->Nvf].cx; + + T->Nvf++; + } + + for(i = 0; i < NumGas; i++) + { + SphP[i].Volume = 4.0 / 3.0 * M_PI * (VF[i + 1].cx * VF[i + 1].cx * VF[i + 1].cx - VF[i].cx * VF[i].cx * VF[i].cx); + SphP[i].Center[0] = 0.5 * (VF[i + 1].cx + VF[i].cx); + SphP[i].Center[1] = 0; + SphP[i].Center[2] = 0; + + SphP[i].SurfaceArea = VF[i].area + VF[i + 1].area; + SphP[i].ActiveArea = SphP[i].SurfaceArea; + } +} + +/*! \brief Structure for 1d spherical Voronoi mesh. + */ +static struct voronoi_1D_data +{ + double x; + int index; +} * mp; + +static int *Id; + +/*! \brief Sort cells by their position (i.e. radius) and reorder in P and + * SphP array. + * + * \return void + */ +void voronoi_1D_order(void) +{ + int i; + + mpi_printf("begin 1D order...\n"); + + if(NumGas) + { + mp = (struct voronoi_1D_data *)mymalloc("mp", sizeof(struct voronoi_1D_data) * NumGas); + Id = (int *)mymalloc("Id", sizeof(int) * NumGas); + + for(i = 0; i < NumGas; i++) + { + mp[i].index = i; + mp[i].x = P[i].Pos[0]; + } + + mysort(mp, NumGas, sizeof(struct voronoi_1D_data), voronoi_1D_compare_key); + + for(i = 0; i < NumGas; i++) + Id[mp[i].index] = i; + + voronoi_1D_reorder_gas(); + + myfree(Id); + myfree(mp); + } + + mpi_printf("1D order done.\n"); +} + +/*! \brief Compare x value of voronoi_1D_data objects. + * + * \param[in] a Pointer to first voronoi_1D_data object. + * \param[in] b Pointer to second voronoi_1D_data object. + * + * \return (-1,0,1) -1 if a->x < b->x. + */ +int voronoi_1D_compare_key(const void *a, const void *b) +{ + if(((struct voronoi_1D_data *)a)->x < (((struct voronoi_1D_data *)b)->x)) + return -1; + + if(((struct voronoi_1D_data *)a)->x > (((struct voronoi_1D_data *)b)->x)) + return +1; + + return 0; +} + +/*! \brief Order the gas cells according to the index given in the ID array. + * + * \return void + */ +void voronoi_1D_reorder_gas(void) +{ + int i; + struct particle_data Psave, Psource; + struct sph_particle_data SphPsave, SphPsource; + int idsource, idsave, dest; + + for(i = 0; i < NumGas; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + SphPsource = SphP[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + Psave = P[dest]; + SphPsave = SphP[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + SphP[dest] = SphPsource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + SphPsource = SphPsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +#endif /* #if defined (ONEDIMS) && defined (ONEDIMS_SPHERICAL) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c new file mode 100644 index 0000000000..7e9e519c13 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c @@ -0,0 +1,2110 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_2d.c + * \date 05/2018 + * \brief Routines to build a 2d Voronoi mesh. + * \details Note that some of these routines have the same name as the ones + * in voronoi_1d.c and voronoi_3d.c and just replace them in case + * the Config-option TWODIMS is active. This is also the reason + * why some of these functions are empty but nonetheless have to + * exist in this file. + * contains functions: + * void initialize_and_create_first_tetra(tessellation * T) + * int insert_point(tessellation * T, int pp, int ttstart) + * void make_a_2_to_4_flip(tessellation * T, int pp, int tt0, + * int tt1, int tt2, int tt3, int i0, int j0) + * void make_a_1_to_3_flip(tessellation * T, int pp, int tt0, + * int tt1, int tt2) + * void check_edge_and_flip_if_needed(tessellation * T, int ip, + * int it) + * int get_triangle(tessellation * T, int pp, int *moves, int + * *degenerate_flag, int ttstart) + * static inline void add_row_2d(double *m, int r1, int r2, + * double fac) + * int solve_linear_equations_2d(double *m, double *res) + * int FindTriangle(tessellation * T, int tt, int pp, + * int *degnerate_flag, int *nexttetra) + * int InCircle_Quick(tessellation * T, int pp0, int pp1, + * int pp2, int pp) + * int InCircle_Errorbound(tessellation * T, int pp0, int pp1, + * int pp2, int pp) + * int InCircle_Exact(tessellation * T, int pp0, int pp1, + * int pp2, int pp) + * double test_triangle_orientation(tessellation * T, int pp0, + * int pp1, int pp2) + * int Orient2d_Quick(tessellation * T, int pp0, int pp1, + * int pp2) + * int Orient2d_Exact(tessellation * T, int pp0, int pp1, + * int pp2) + * void process_edge_faces_and_volumes(tessellation * T, int tt, + * int nr) + * int derefine_refine_get_triangles(tessellation * T, int tt, + * int nr, point * dtip, triangle * trilist, int ntri, + * int max_n_tri) + * int derefine_add_point_and_split_tri(int q, triangle + * * trilist, int ntri, int max_ntri, double vol) + * double get_tri_volume(int i, triangle * trilist) + * void derefine_refine_process_edge(tessellation * T, double + * *vol, int tt, int nr) + * void compute_circumcircles(tessellation * T) + * void update_circumcircle(tessellation * T, int tt) + * void set_integers_for_pointer(point * p) + * void write_voronoi_mesh(tessellation * T, char *fname, int + * writeTask, int lastTask) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if defined(TWODIMS) && !defined(ONEDIMS) /* will only be compiled in 2D case */ + +#define INSIDE_EPS 1.0e-8 +#define GAUSS_EPS 1.0e-8 + +/*! \brief Initializes 2d tessellation and create all-enclosing triangle. + * + * \param[out] T Pointer to tessellation structure which is set and its arrays + * are allocated in this routine. + * + * \return void + */ +void initialize_and_create_first_tetra(tessellation *T) +{ + point *p; + int i, n; + + T->MaxNdp = T->Indi.AllocFacNdp; + T->MaxNdt = T->Indi.AllocFacNdt; + T->MaxNvf = T->Indi.AllocFacNvf; + + T->Ndp = 0; + T->Nvf = 0; + T->Ndt = 0; + + T->VF = mymalloc_movable(&T->VF, "VF", T->MaxNvf * sizeof(face)); + + T->DP = mymalloc_movable(&T->DP, "DP", (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + T->DT = mymalloc_movable(&T->DT, "DT", T->MaxNdt * sizeof(tetra)); + + /* construct all encompassing huge triangle */ + double box, tetra_incircle, tetra_sidelength, tetra_height; + + box = boxSize_X; + if(box < boxSize_Y) + box = boxSize_Y; + + box *= 1.05; + + tetra_incircle = 2.001 * (1 + sqrt(3)) / 3.0 * box; /* to give room for ghost particles needed for periodic/reflective + boundary conditions, the incircle is twice as large, i.e. + [-0.5*box, 1.5*box,-0.5*box, 1.5*box] should be inside triangle */ + tetra_sidelength = tetra_incircle * sqrt(12); + tetra_height = sqrt(3.0) / 2 * tetra_sidelength; + + if(ThisTask == 0) + printf("side-length of enclosing triangle=%g tetra_height=%g box=%g\n", tetra_sidelength, tetra_height, box); + + point *DP = T->DP; + tetra *DT = T->DT; + + /* first, let's make the points */ + DP[-3].x = 0.5 * tetra_sidelength; + DP[-3].y = -1.0 / 3 * tetra_height; + DP[-3].z = 0; + + DP[-2].x = 0; + DP[-2].y = 2.0 / 3 * tetra_height; + DP[-2].z = 0; + + DP[-1].x = -0.5 * tetra_sidelength; + DP[-1].y = -1.0 / 3 * tetra_height; + DP[-1].z = 0; + + for(i = -3; i <= -1; i++) + { + DP[i].x += 0.5 * box; + DP[i].y += 1.0 / 3 * tetra_height - 0.5 * box; + } + + for(i = -3, p = &DP[-3]; i < 0; i++, p++) + { + p->index = -1; + p->task = ThisTask; + p->timebin = 0; + } + + /* we also define a neutral element at infinity */ + DPinfinity = -4; + + DP[DPinfinity].x = MAX_DOUBLE_NUMBER; + DP[DPinfinity].y = MAX_DOUBLE_NUMBER; + DP[DPinfinity].z = MAX_DOUBLE_NUMBER; + DP[DPinfinity].index = -1; + DP[DPinfinity].task = ThisTask; + DP[DPinfinity].timebin = 0; + + /* now let's make the big triangle */ + DT[0].p[0] = -3; + DT[0].p[1] = -2; + DT[0].p[2] = -1; + + /* On the outer faces, we attach tetrahedra with the neutral element as tip. + * This way we will be able to navigate nicely within the tesselation, + * and all tetrahedra have defined neighbouring tetrahedra. + */ + + for(i = 0; i < 3; i++) + { + n = i + 1; /* tetra index */ + + DT[0].t[i] = n; + DT[0].s[i] = 2; + + DT[n].t[2] = 0; + DT[n].s[2] = i; + DT[n].p[2] = DPinfinity; + } + + DT[1].p[0] = DT[0].p[2]; + DT[1].p[1] = DT[0].p[1]; + + DT[2].p[0] = DT[0].p[0]; + DT[2].p[1] = DT[0].p[2]; + + DT[3].p[0] = DT[0].p[1]; + DT[3].p[1] = DT[0].p[0]; + + DT[1].t[0] = 3; + DT[3].t[1] = 1; + DT[1].s[0] = 1; + DT[3].s[1] = 0; + + DT[1].t[1] = 2; + DT[2].t[0] = 1; + DT[1].s[1] = 0; + DT[2].s[0] = 1; + + DT[2].t[1] = 3; + DT[3].t[0] = 2; + DT[2].s[1] = 0; + DT[3].s[0] = 1; + + T->Ndt = 4; /* we'll start out with 4 triangles */ + + CentralOffsetX = 0.5 * box - 0.5000001 * tetra_sidelength; + CentralOffsetY = -0.5000001 * box; + + ConversionFac = 1.0 / (1.001 * tetra_sidelength); + + for(i = -3; i < 0; i++) + set_integers_for_point(T, i); +} + +/*! \brief Insert a point into mesh. + * + * Finds the triangle that contains this point, splits the triangle (usually + * into three). After this, flip the edges if needed restore + * Delaunayhood (which is applied recursively) until a valid Delaunay mesh + * is restored. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of Delaunay point in DP array. + * \param[in] ttstart Initial guess in which triangle it might be, + * index in DT array. + * + * \return Index of triangle containing point pp. + */ +int insert_point(tessellation *T, int pp, int ttstart) +{ + int tt0, tt1, tt2, tt3, ttetra_with_p; + int moves, degenerate_flag; + + /* first, need to do a point location */ + tt0 = get_triangle(T, pp, &moves, °enerate_flag, ttstart); + + ttetra_with_p = tt0; + + if(degenerate_flag == 1) /* that's the normal split of a triangle into 3 */ + { + /* we now need to split this triangle into three */ + tt1 = T->Ndt++; + tt2 = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + T->DT[tt1] = T->DT[tt0]; + T->DT[tt2] = T->DT[tt0]; + + make_a_1_to_3_flip(T, pp, tt0, tt1, tt2); + + T->DTF[tt0] = 0; + T->DTF[tt1] = 0; + T->DTF[tt2] = 0; + + check_edge_and_flip_if_needed(T, pp, tt0); + check_edge_and_flip_if_needed(T, pp, tt1); + check_edge_and_flip_if_needed(T, pp, tt2); + } + else + { + degenerate_flag -= 10; + + tt1 = T->DT[tt0].t[degenerate_flag]; + + /* we now need to split this into two triangles */ + tt2 = T->Ndt++; + tt3 = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + T->DT[tt2] = T->DT[tt0]; + T->DT[tt3] = T->DT[tt1]; + + make_a_2_to_4_flip(T, pp, tt0, tt1, tt2, tt3, degenerate_flag, T->DT[tt0].s[degenerate_flag]); + + T->DTF[tt0] = 0; + T->DTF[tt1] = 0; + T->DTF[tt2] = 0; + T->DTF[tt3] = 0; + + check_edge_and_flip_if_needed(T, pp, tt0); + check_edge_and_flip_if_needed(T, pp, tt1); + check_edge_and_flip_if_needed(T, pp, tt2); + check_edge_and_flip_if_needed(T, pp, tt3); + } + + return ttetra_with_p; +} + +/*! \brief Make a 2 to 4 flip needed if point is on edge of a Delaunay + * triangle. + * + * If a new point is at the edge of a Delaunay triangle, both adjacent + * triangles need to be split into two. See Springel (2010) for a + * detailed discussion. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of Delaunay point in DP array. + * \param[in] tt0 Index of point 0 in DT array. + * \param[in] tt1 Index of point 1 in DT array. + * \param[in] tt2 Index of point 2 in DT array. + * \param[in] tt3 Index of point 3 in DT array. + * \param[in] i0 Index (in DT->s) of point opposite to common face that needs + * to be involved in flip. + * \param[in] j0 Second Index (in DT->s) of point opposite to common face that + * needs to be involved in flip. + * + * \return void + */ +void make_a_2_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3, int i0, int j0) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + tetra *t3 = &DT[tt3]; + + int i1, i2, j1, j2; + + CountFlips++; + Count_2_to_4_Flips2d++; + + i1 = i0 + 1; + i2 = i0 + 2; + j1 = j0 + 1; + j2 = j0 + 2; + + if(i1 > 2) + i1 -= 3; + if(i2 > 2) + i2 -= 3; + + if(j1 > 2) + j1 -= 3; + if(j2 > 2) + j2 -= 3; + + t0->p[i1] = pp; + t1->p[j2] = pp; + t2->p[i2] = pp; + t3->p[j1] = pp; + + t0->t[i0] = tt1; + t1->t[j0] = tt0; + t0->s[i0] = j0; + t1->s[j0] = i0; + + t1->t[j1] = tt3; + t3->t[j2] = tt1; + t1->s[j1] = j2; + t3->s[j2] = j1; + + t2->t[i1] = tt0; + t0->t[i2] = tt2; + t2->s[i1] = i2; + t0->s[i2] = i1; + + t2->t[i0] = tt3; + t3->t[j0] = tt2; + t2->s[i0] = j0; + t3->s[j0] = i0; + + DT[t0->t[i1]].t[t0->s[i1]] = tt0; + DT[t1->t[j2]].t[t1->s[j2]] = tt1; + DT[t2->t[i2]].t[t2->s[i2]] = tt2; + DT[t3->t[j1]].t[t3->s[j1]] = tt3; +} + +/*! \brief Makes a 1 to 3 flip needed if point is in a Delaunay triangle. + * + * If a new point is in a Delaunay triangle, this + * triangles need to be split into three. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of Delaunay point in DP array. + * \param[in] tt0 Index of point 0 in DT array. + * \param[in] tt1 Index of point 1 in DT array. + * \param[in] tt2 Index of point 2 in DT array. + * + * \return void + */ +void make_a_1_to_3_flip(tessellation *T, int pp, int tt0, int tt1, int tt2) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + + CountFlips++; + Count_1_to_3_Flips2d++; + + t0->p[0] = pp; + t1->p[1] = pp; + t2->p[2] = pp; + + t0->t[1] = tt1; + t1->t[0] = tt0; + t0->s[1] = 0; + t1->s[0] = 1; + + t1->t[2] = tt2; + t2->t[1] = tt1; + t1->s[2] = 1; + t2->s[1] = 2; + + t2->t[0] = tt0; + t0->t[2] = tt2; + t2->s[0] = 2; + t0->s[2] = 0; + + DT[t0->t[0]].t[t0->s[0]] = tt0; + DT[t1->t[1]].t[t1->s[1]] = tt1; + DT[t2->t[2]].t[t2->s[2]] = tt2; +} + +/*! \brief Flips trangle if needed. + * + * See Springel (2010) for detailed discussion how mesh is constructed. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] ip Index to Delaunay point, DP array. + * \param[in] it Index to corner of triangle, DT array. + * + * \return void + */ +void check_edge_and_flip_if_needed(tessellation *T, int ip, int it) +{ + tetra *DT = T->DT; + char *DTF = T->DTF; + + tetra *t = &DT[it]; + + int tt, pp, t0, t2; + int pi, pi1, pi2; + int ni, ni1, ni2; + int st2, st0; + + if(t->p[0] == ip) + pi = 0; + else if(t->p[1] == ip) + pi = 1; + else + pi = 2; + + /* get the point that lies accross the edge to obtain the quadriliteral */ + + tt = t->t[pi]; + ni = t->s[pi]; + pp = DT[tt].p[ni]; + + int ret, ret_exact; + + ret = InCircle_Errorbound(T, t->p[0], t->p[1], t->p[2], pp); + CountInSphereTests++; + + if(ret != 0) + ret_exact = ret; + else + { + ret_exact = InCircle_Exact(T, t->p[0], t->p[1], t->p[2], pp); + CountInSphereTestsExact++; + } + + if(ret_exact > 0) + { + /* pp lies in the triangle, the edge is not Delaunay. Need to do a flip */ + + CountFlips++; + + ni1 = ni + 1; + if(ni1 > 2) + ni1 -= 3; + ni2 = ni + 2; + if(ni2 > 2) + ni2 -= 3; + + pi1 = pi + 1; + if(pi1 > 2) + pi1 -= 3; + pi2 = pi + 2; + if(pi2 > 2) + pi2 -= 3; + + t0 = DT[tt].t[ni1]; + t2 = t->t[pi1]; + + st0 = DT[tt].s[ni1]; + st2 = t->s[pi1]; + + /* change the points of the triangles */ + t->p[pi2] = pp; + DT[tt].p[ni2] = ip; + + /* change the pointers to the neighbouring triangles, and fix + the adjency relations */ + + t->t[pi1] = tt; + DT[tt].t[ni1] = it; + t->s[pi1] = ni1; + DT[tt].s[ni1] = pi1; + + t->t[pi] = t0; + DT[t0].t[st0] = it; + t->s[pi] = st0; + DT[t0].s[st0] = pi; + + DT[tt].t[ni] = t2; + DT[t2].t[st2] = tt; + DT[tt].s[ni] = st2; + DT[t2].s[st2] = ni; + + DTF[tt] = 0; + DTF[it] = 0; + + /* now we need to test also the two sides opposite of p */ + check_edge_and_flip_if_needed(T, ip, it); + check_edge_and_flip_if_needed(T, ip, tt); + } +} + +/*! \brief Finds triangle in which new Delaunay point is located. + * + * Starts with a suggested triangle ttstart and checks if the point is + * contained in this triangle. If not, the procedure is repeated for the + * neighboring triangle. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp Index of Delaunay point in DP array. + * \param[out] moves Number of iterations to find the correct triangle. + * \param[out] degenerate_flag Flag if point lies on edge of a triangle. + * \param[in] ttstart Starting index for the search for the correct triangle. + * + * \return Index of triangle in DT array. + */ +int get_triangle(tessellation *T, int pp, int *moves, int *degenerate_flag, int ttstart) +{ + int count_moves = 0; + int ret; + int tt, next_tetra; + + tt = ttstart; + +#define MAX_COUNT_MOVES 1000000 + + while((ret = FindTriangle(T, tt, pp, degenerate_flag, &next_tetra)) == 0) + { + /* we need to see in which of the three possible neighbouring triangles + we should walk. We'll choose the one which lies along the face that + is traversed by a line from the cm of the triangle to the point in + question. + */ + count_moves++; + + if(count_moves > MAX_COUNT_MOVES) + { + printf("ta=%d triangle=%d xy=(%g|%g) ID=%d\n", ThisTask, (int)(tt), T->DP[pp].x, T->DP[pp].y, T->DP[pp].ID); + if(count_moves > MAX_COUNT_MOVES + 10) + terminate("too many moves, problem to find triangle"); + } + + tt = next_tetra; + } + + *moves = count_moves; + + return tt; +} + +/*! \brief Add row in matrix equation. + * + * Auxiliary function for solve_linear_equations_2d. + * + * \param[in, out] m Matrix. + * \param[in] r1 Index of row to be modified. + * \param[in] r2 Index of row which is added to r1. + * \param[in] fac Factor by which row r2 is multiplied before adding to r1. + * + * \return void + */ +static inline void add_row_2d(double *m, int r1, int r2, double fac) +{ + int i; + + for(i = 0; i < 3; i++) + m[r1 * 3 + i] += fac * m[r2 * 3 + i]; +} + +/*! \brief Solve system of linear equations for 2d Voronoi construction. + * + * This is needed in get_triangle routine. + * + * \param[in, out] m Matrix. + * \param[in, out] res Array for result. + * + * \return 0 if success, -1 else. + */ +int solve_linear_equations_2d(double *m, double *res) +{ + int ix, iy; + + if(fabs(m[0]) > fabs(m[3])) + { + ix = 0; + iy = 1; + } + else + { + ix = 1; + iy = 0; + } + + add_row_2d(m, iy, ix, -m[iy * 3] / m[ix * 3]); + + res[1] = m[iy * 3 + 2] / m[iy * 3 + 1]; + res[0] = (m[ix * 3 + 2] - res[1] * m[ix * 3 + 1]) / m[ix * 3]; + + if(fabs(m[ix * 3]) < 1.0e-12) + return -1; + + return 0; +} + +/*! \brief Does point lie in triangle? + * + * Tests whether point pp lies in the triangle, on an edge, or outside. In the + * latter case, a neighboring triangle is returned. First, a fast search is + * performed and if this yields that point might be on an edge, a (more + * expensive) exact determination is performed. + * + * \param[in] T Pointer to tessellation. + * \param[in] tt Index of triangle in DT array. + * \param[in] pp Index of Delaunay point in DP array. + * \param[out] degenerate_flag Flag if point lies on edge of a triangle. + * \param[out] nexttetra Index of neighboring triangle in direction of point. + * + * \return 1: point inside triangle; 0 outside; 10,11,12: on edge. + */ +int FindTriangle(tessellation *T, int tt, int pp, int *degnerate_flag, int *nexttetra) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra *t = &DT[tt]; + point *p = &DP[pp]; + + int pp0, pp1, pp2; + point *p0, *p1, *p2; + + pp0 = t->p[0]; + pp1 = t->p[1]; + pp2 = t->p[2]; + + p0 = &DP[pp0]; + p1 = &DP[pp1]; + p2 = &DP[pp2]; + + if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity) + { + char buf[1000]; + sprintf(buf, "we are in a triangle with an infinity point. tetra=%d p=(%g|%g)\n", (int)(tt), p->x, p->y); + terminate(buf); + } + + Count_InTetra++; + + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + + double qx = p->xx - p0->xx; + double qy = p->yy - p0->yy; + + double mv_data[] = {ax, bx, qx, ay, by, qy}; + double x[2]; + + int ivol, flag2, flag1, flag0; + int count_zeros = 0; + + int status; + + status = solve_linear_equations_2d(mv_data, x); + + if(status < 0) + { + ivol = Orient2d_Exact(T, t->p[0], t->p[1], t->p[2]); + if(ivol <= 0) + { + char buf[1000]; + sprintf(buf, "flat or negatively triangle found (ivol=%d)\n", ivol); + terminate(buf); + } + } + + if(status >= 0) + { + if(x[0] > INSIDE_EPS && x[1] > INSIDE_EPS && (1 - (x[0] + x[1])) > INSIDE_EPS) + { + /* looks like we are safely inside the triangle */ + + *degnerate_flag = 1; + return 1; + } + + if(x[0] < -INSIDE_EPS || x[1] < -INSIDE_EPS || (1 - (x[0] + x[1])) < -INSIDE_EPS) + { + /* looks like we are clearly outside the triangle. + Let's look for a good neighbouring triangle to continue the search */ + + /* note: in the (a,b) basis, the center-of-mass has coordinates (1/3, 1/3) */ + + double w, u; + + if(fabs(x[1] - (1.0 / 3)) > INSIDE_EPS) + { + w = (1.0 / 3) / ((1.0 / 3) - x[1]); + if(w > 0) + { + u = (1.0 / 3) + w * (x[0] - (1.0 / 3)); + if(u > -INSIDE_EPS && (1 - u) > -INSIDE_EPS) + { + *nexttetra = t->t[2]; + return 0; + } + } + } + + if(fabs(x[0] - (1.0 / 3)) > INSIDE_EPS) + { + w = (1.0 / 3) / ((1.0 / 3) - x[0]); + if(w > 0) + { + u = (1.0 / 3) + w * (x[1] - (1.0 / 3)); + if(u > -INSIDE_EPS && (1 - u) > -INSIDE_EPS) + { + *nexttetra = t->t[1]; + return 0; + } + } + } + + *nexttetra = t->t[0]; + return 0; + } + } + + /* here we need to decide whether we have a degenerate case, i.e. + whether we think the point lies on an edge of the triangle */ + + Count_InTetraExact++; + + ivol = Orient2d_Exact(T, t->p[0], t->p[1], t->p[2]); + + if(ivol <= 0) + { + char buf[1000]; + sprintf(buf, "flat or negatively oriented triangle found (ivol=%d)\n", ivol); + terminate(buf); + } + + flag0 = Orient2d_Exact(T, pp1, pp2, pp); + flag1 = Orient2d_Exact(T, pp2, pp0, pp); + flag2 = Orient2d_Exact(T, pp0, pp1, pp); + + if(flag0 == 0) + count_zeros++; + + if(flag1 == 0) + count_zeros++; + + if(flag2 == 0) + count_zeros++; + + if(count_zeros >= 2) + { + printf("flags=%d %d %d\n", flag0, flag1, flag2); + + printf("points: %d %d %d %d\n", (int)(pp0), (int)(pp1), (int)(pp2), (int)(pp)); + printf("Ngas=%d\n", NumGas); + printf("xyz, p=%d: (%g|%g) index=%d task=%d ID=%d flags\n", (int)(pp0), p0->x, p0->y, p0->index, p0->task, + P[p0->index % NumGas].ID); + printf("xyz, p=%d: (%g|%g) index=%d task=%d ID=%d flags\n", (int)(pp1), p1->x, p1->y, p1->index, p1->task, + P[p1->index % NumGas].ID); + printf("xyz, p=%d: (%g|%g) index=%d task=%d ID=%d flags\n", (int)(pp2), p2->x, p2->y, p2->index, p2->task, + P[p2->index % NumGas].ID); + printf("xyz, p=%d: (%g|%g) index=%d task=%d ID=%d flags\n", (int)(pp), p->x, p->y, p->index, p->task, P[p->index % NumGas].ID); + terminate("too many zeros - (perhaps identical points inserted?)"); + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0) + { + /* we have a point inside the triangle, but it may still be on one of the edges */ + + if(count_zeros == 0) + { + /* ok, we are inside */ + *degnerate_flag = 1; + return 1; + } + + if(count_zeros == 1) /* we lie on a face */ + { + if(flag2 == 0) + { + *degnerate_flag = 12; + return 12; /* point lies on side A */ + } + if(flag1 == 0) + { + *degnerate_flag = 11; + return 11; /* point lies on side C */ + } + + if(flag0 == 0) + { + *degnerate_flag = 10; + return 10; /* point lies on side B */ + } + } + } + + /* we are clearly outside, let's select the suitable neighbour */ + + if(flag0 < 0 && flag1 >= 0 && flag2 >= 0) + { + *nexttetra = t->t[0]; + return 0; + } + + if(flag0 >= 0 && flag1 < 0 && flag2 >= 0) + { + *nexttetra = t->t[1]; + return 0; + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 < 0) + { + *nexttetra = t->t[2]; + return 0; + } + + /* there are apparently two negative values. Let's pick a random one */ + + int ind = -1; + + if(flag0 < 0) + { + if(ind < 0) + ind = 0; + else + { + if(get_random_number() < 0.5) + ind = 0; + } + } + + if(flag1 < 0) + { + if(ind < 0) + ind = 1; + else + { + if(get_random_number() < 0.5) + ind = 1; + } + } + + if(flag2 < 0) + { + if(ind < 0) + ind = 2; + else + { + if(get_random_number() < 0.5) + ind = 2; + } + } + + *nexttetra = t->t[ind]; + return 0; +} + +/*! \brief Tests whether point pp lies in the circumcircle around triangle + * p0,p1,p2. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * \param[in] pp Index in DP of point to be checked. + * + * \return (-1,0,1); -1: in circle; 0 on circle, 1: outside circle. + */ +int InCircle_Quick(tessellation *T, int pp0, int pp1, int pp2, int pp) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + point *p = &DP[pp]; + + double ax, ay, bx, by, cx, cy; + double ab, bc, ca, a2, b2, c2, x; + + if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity || pp == DPinfinity) + return -1; + + ax = p0->xx - p->xx; + ay = p0->yy - p->yy; + bx = p1->xx - p->xx; + by = p1->yy - p->yy; + cx = p2->xx - p->xx; + cy = p2->yy - p->yy; + + ab = ax * by - bx * ay; + bc = bx * cy - cx * by; + ca = cx * ay - ax * cy; + + a2 = ax * ax + ay * ay; + b2 = bx * bx + by * by; + c2 = cx * cx + cy * cy; + + x = a2 * bc + b2 * ca + c2 * ab; + + if(x < 0) + return -1; + if(x > 0) + return +1; + + return 0; +} + +/*! \brief Tests whether point pp lies in the circumcircle around triangle + * p0,p1,p2 with some error margin. + * + * This error margin should be large enough to exclude that close cases are + * misclssified due to numerical round-off errors. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * \param[in] pp Index in DP of point to be checked. + * + * \return (-1,0,1); -1: in circle; 0 on circle (within tolerance), + * 1: outside circle. + */ +int InCircle_Errorbound(tessellation *T, int pp0, int pp1, int pp2, int pp) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + point *p = &DP[pp]; + + if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity || pp == DPinfinity) + return -1; + + double ax, ay, bx, by, cx, cy; + double ab, bc, ca, a2, b2, c2, x; + double axby, bxay, bxcy, cxby, cxay, axcy; + + ax = p0->xx - p->xx; + ay = p0->yy - p->yy; + bx = p1->xx - p->xx; + by = p1->yy - p->yy; + cx = p2->xx - p->xx; + cy = p2->yy - p->yy; + + axby = ax * by; + bxay = bx * ay; + bxcy = bx * cy; + cxby = cx * by; + cxay = cx * ay; + axcy = ax * cy; + + ca = cxay - axcy; + ab = axby - bxay; + bc = bxcy - cxby; + + a2 = ax * ax + ay * ay; + b2 = bx * bx + by * by; + c2 = cx * cx + cy * cy; + + x = a2 * bc + b2 * ca + c2 * ab; + + /* calculate absolute maximum size */ + + double sizelimit = a2 * (fabs(bxcy) + fabs(cxby)) + b2 * (fabs(cxay) + fabs(axcy)) + c2 * (fabs(axby) + fabs(bxay)); + + double errbound = 1.0e-14 * sizelimit; + + if(x < -errbound) + return -1; + else if(x > errbound) + return +1; + + return 0; +} + +/*! \brief Tests whether point pp lies in the circumcircle around triangle + * p0,p1,p2 using arbitrary precision operations. + * + * This is the exact solution, but computationally very expensive, thus only + * called for the unclear cases. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * \param[in] pp Index in DP of point to be checked. + * + * \return (-1,0,1); -1: in circle; 0 on circle, + * 1: outside circle. + */ +int InCircle_Exact(tessellation *T, int pp0, int pp1, int pp2, int pp) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + point *p = &DP[pp]; + + if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity || pp == DPinfinity) + return -1; + + IntegerMapType ax, ay, bx, by, cx, cy; + + ax = p0->ix - p->ix; + ay = p0->iy - p->iy; + bx = p1->ix - p->ix; + by = p1->iy - p->iy; + cx = p2->ix - p->ix; + cy = p2->iy - p->iy; + + mpz_t axby, bxay, bxcy, cxby, cxay, axcy, tmp; + + mpz_init(tmp); + + mpz_init(axby); + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(axby, tmp, by); + mpz_init(bxay); + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(bxay, tmp, ay); + mpz_init(bxcy); + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(bxcy, tmp, cy); + mpz_init(cxby); + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(cxby, tmp, by); + mpz_init(cxay); + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(cxay, tmp, ay); + mpz_init(axcy); + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(axcy, tmp, cy); + + mpz_t ca, ab, bc; + + mpz_init(ca); + mpz_init(ab); + mpz_init(bc); + + mpz_sub(ca, cxay, axcy); + mpz_sub(ab, axby, bxay); + mpz_sub(bc, bxcy, cxby); + + mpz_t AA, BB, a2, b2, c2; + + mpz_init(AA); + mpz_init(BB); + mpz_init(a2); + mpz_init(b2); + mpz_init(c2); + + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(AA, tmp, ax); + MY_mpz_set_si(tmp, ay); + MY_mpz_mul_si(BB, tmp, ay); + mpz_add(a2, AA, BB); + + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(AA, tmp, bx); + MY_mpz_set_si(tmp, by); + MY_mpz_mul_si(BB, tmp, by); + mpz_add(b2, AA, BB); + + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(AA, tmp, cx); + MY_mpz_set_si(tmp, cy); + MY_mpz_mul_si(BB, tmp, cy); + mpz_add(c2, AA, BB); + + /* now calculate the final result */ + + mpz_mul(AA, a2, bc); + mpz_mul(BB, b2, ca); + mpz_add(tmp, AA, BB); + mpz_mul(BB, c2, ab); + mpz_add(AA, BB, tmp); + + int sign = mpz_sgn(AA); + + mpz_clear(c2); + mpz_clear(b2); + mpz_clear(a2); + mpz_clear(BB); + mpz_clear(AA); + mpz_clear(bc); + mpz_clear(ab); + mpz_clear(ca); + mpz_clear(axcy); + mpz_clear(cxay); + mpz_clear(cxby); + mpz_clear(bxcy); + mpz_clear(bxay); + mpz_clear(axby); + mpz_clear(tmp); + + return sign; +} + +/*! \brief Returns the orientation of the triangle. + * + * Defined as the determinant of the matrix of the position of the three edge + * points a, b and c: + * | ax, ay, 1 | + * | bx, by, 1 | + * | cx, cy, 1 | + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * + * \return Determinant of orientation matrix. + */ +double test_triangle_orientation(tessellation *T, int pp0, int pp1, int pp2) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + + return (p1->x - p0->x) * (p2->y - p0->y) - (p1->y - p0->y) * (p2->x - p0->x); +} + +/*! \brief Check if triangle is positively or negatively oriented. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * + * \return -1 if negatively, 0 if degenerate (in a line) and 1 if positively + * oriented. + */ +int Orient2d_Quick(tessellation *T, int pp0, int pp1, int pp2) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + + double x; + + x = (p1->xx - p0->xx) * (p2->yy - p0->yy) - (p1->yy - p0->yy) * (p2->xx - p0->xx); + + if(x < 0) + return -1; + if(x > 0) + return +1; + return 0; +} + +/*! \brief Check if triangle is positively or negatively oriented. + * + * Uses arbitrary precision operations, which is computationally expensive but + * garantees the correct result. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * + * \return -1 if negatively, 0 if degenerate (in a line) and 1 if positively + * oriented. + */ +int Orient2d_Exact(tessellation *T, int pp0, int pp1, int pp2) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + +#if USEDBITS > 31 + IntegerMapType dx1, dy1, dx2, dy2; + + dx1 = (p1->ix - p0->ix); + dy1 = (p1->iy - p0->iy); + dx2 = (p2->ix - p0->ix); + dy2 = (p2->iy - p0->iy); + + mpz_t dx1dy2, dx2dy1, tmp; + + mpz_init(tmp); + mpz_init(dx1dy2); + mpz_init(dx2dy1); + + MY_mpz_set_si(tmp, dx1); + MY_mpz_mul_si(dx1dy2, tmp, dy2); + + MY_mpz_set_si(tmp, dx2); + MY_mpz_mul_si(dx2dy1, tmp, dy1); + + mpz_sub(tmp, dx1dy2, dx2dy1); + + int sign = mpz_sgn(tmp); + + mpz_clear(dx2dy1); + mpz_clear(dx1dy2); + mpz_clear(tmp); + + return (sign); + +#else /* #if USEDBITS > 31 */ + signed long long dx1, dy1, dx2, dy2, x; + + dx1 = (p1->ix - p0->ix); + dy1 = (p1->iy - p0->iy); + dx2 = (p2->ix - p0->ix); + dy2 = (p2->iy - p0->iy); + + x = dx1 * dy2 - dy1 * dx2; + + if(x < 0) + return -1; + if(x > 0) + return +1; + return 0; +#endif /* #if USEDBITS > 31 #else */ +} + +const int edge_start[3] = {1, 2, 0}; +const int edge_end[3] = {2, 0, 1}; + +/*! \brief Calculate cell volumes and face areas of mesh. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[in] nr Index in edges. + * + * \return void + */ +void process_edge_faces_and_volumes(tessellation *T, int tt, int nr) +{ + int i, j, qq, p1, p2, k; + face *f; + double nx, ny; + double sx, sy; + double hx, hy; + double dvol, h; + + if(T->Nvf + 1 >= T->MaxNvf) + { + T->Indi.AllocFacNvf *= ALLOC_INCREASE_FACTOR; + T->MaxNvf = T->Indi.AllocFacNvf; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvf=%d Indi.AllocFacNvf=%g\n", ThisTask, T->MaxNvf, T->Indi.AllocFacNvf); +#endif /* #ifdef VERBOSE */ + T->VF = myrealloc_movable(T->VF, T->MaxNvf * sizeof(face)); + + if(T->Nvf + 1 >= T->MaxNvf) + terminate("Nvf larger than MaxNvf"); + } + + tetra *DT = T->DT; + point *DP = T->DP; + face *VF = T->VF; + tetra_center *DTC = T->DTC; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + + point *dpi = &DP[t->p[i]]; + point *dpj = &DP[t->p[j]]; + + qq = t->t[nr]; + + Edge_visited[tt] |= (1 << nr); + Edge_visited[qq] |= (1 << (t->s[nr])); + + p1 = t->p[i]; + p2 = t->p[j]; + + f = &VF[T->Nvf++]; + + f->p1 = p1; + f->p2 = p2; + + f->cx = 0.5 * (DTC[tt].cx + DTC[qq].cx); + f->cy = 0.5 * (DTC[tt].cy + DTC[qq].cy); + f->cz = 0; + +#ifdef TETRA_INDEX_IN_FACE + f->dt_index = tt; +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + +#ifdef REFINEMENT_MERGE_CELLS + f->t = tt; + f->nr = nr; /* delaunay tetra and edge number that generated this face */ +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + + nx = DTC[tt].cx - DTC[qq].cx; + ny = DTC[tt].cy - DTC[qq].cy; + + f->area = sqrt(nx * nx + ny * ny); + + hx = 0.5 * (dpi->x - dpj->x); + hy = 0.5 * (dpi->y - dpj->y); + + h = sqrt(hx * hx + hy * hy); + dvol = 0.5 * f->area * h; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + double angle = 0.5 * f->area / h; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + + if(dpi->task == ThisTask && dpi->index >= 0 && dpi->index < NumGas) + { + if(TimeBinSynchronized[P[dpi->index].TimeBinHydro]) + { + SphP[dpi->index].Volume += dvol; + SphP[dpi->index].SurfaceArea += f->area; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + if(SphP[dpi->index].MaxFaceAngle < angle) + SphP[dpi->index].MaxFaceAngle = angle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + +#ifdef OUTPUT_SURFACE_AREA + if(f->area) + SphP[dpi->index].CountFaces++; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#if defined(REFINEMENT_SPLIT_CELLS) + if(SphP[dpi->index].MinimumEdgeDistance > h) + SphP[dpi->index].MinimumEdgeDistance = h; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + /* let's now compute the center-of-mass of the pyramid at the bottom top */ + sx = (2.0 / 3) * f->cx + (1.0 / 3) * dpi->x; + sy = (2.0 / 3) * f->cy + (1.0 / 3) * dpi->y; + + SphP[dpi->index].Center[0] += dvol * sx; + SphP[dpi->index].Center[1] += dvol * sy; + } + } + + if(dpj->task == ThisTask && dpj->index >= 0 && dpj->index < NumGas) + { + if(TimeBinSynchronized[P[dpj->index].TimeBinHydro]) + { + SphP[dpj->index].Volume += dvol; + SphP[dpj->index].SurfaceArea += f->area; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + if(SphP[dpj->index].MaxFaceAngle < angle) + SphP[dpj->index].MaxFaceAngle = angle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + +#ifdef OUTPUT_SURFACE_AREA + if(f->area) + SphP[dpj->index].CountFaces++; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#if defined(REFINEMENT_SPLIT_CELLS) + if(SphP[dpj->index].MinimumEdgeDistance > h) + SphP[dpj->index].MinimumEdgeDistance = h; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + + /* let's now compute the center-of-mass of the pyramid on top */ + sx = (2.0 / 3) * f->cx + (1.0 / 3) * dpj->x; + sy = (2.0 / 3) * f->cy + (1.0 / 3) * dpj->y; + + SphP[dpj->index].Center[0] += dvol * sx; + SphP[dpj->index].Center[1] += dvol * sy; + } + } + int low_p, high_p; + + if(DP[p1].ID < DP[p2].ID) + { + low_p = p1; + high_p = p2; + } + else + { + low_p = p2; + high_p = p1; + } + + int this_task_responsible_flag = 0; + + if(TimeBinSynchronized[DP[low_p].timebin]) /* the one with the lower ID is active */ + { + /* we need to check whether the one with the lower ID is a local particle */ + if(DP[low_p].task == ThisTask && DP[low_p].index >= 0 && DP[low_p].index < NumGas) + this_task_responsible_flag = 1; + } + else if(TimeBinSynchronized[DP[high_p].timebin]) /* only the side with the higher ID is active */ + { + /* we need to check whether we hold the one with the higher ID, if yes, we'll do it */ + if(DP[high_p].task == ThisTask && DP[high_p].index >= 0 && DP[high_p].index < NumGas) + this_task_responsible_flag = 1; + } + + if(this_task_responsible_flag) + { + for(k = 0; k < 2; k++) + { + int p, q; + + if(k == 0) + { + q = p1; + p = DP[q].index; + } + else + { + q = p2; + p = DP[q].index; + } + + if(DP[q].task == ThisTask) + { + if(DP[q].index >= NumGas) /* this is a local ghost point */ + p -= NumGas; + + SphP[p].ActiveArea += f->area; + } + else + { + /* here we have a foreign ghost point */ + if(DP[q].originalindex < 0) + terminate("should not happen"); + + if(Narea >= MaxNarea) + { + T->Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNarea = T->Indi.AllocFacNflux; + AreaList = myrealloc_movable(AreaList, MaxNarea * sizeof(struct area_list_data)); + + if(Narea >= MaxNarea) + terminate("Narea >= MaxNarea"); + } + + AreaList[Narea].task = DP[q].task; + AreaList[Narea].index = DP[q].originalindex; + AreaList[Narea].darea = f->area; + Narea++; + } + } + } +} + +/*! \brief Copies triangle information from DTC array to trilist. + * + * Performs an orientation check and swaps orientation if needed. + * + * \param[in] T Pointer to tessellation. + * \param[in] tt Index of triangle in DT array. + * \param[in] nr Index in DT[tt].t array (adjacent tetrahedrons). + * \param[in] dtip Pointer to point to be inserted. + * \param[out] trilist Array of triangles. + * \param[in] ntri Index in trilist array. + * \param[in] max_n_tri Maximum index in trilist array. + * + * \return Next index in trilist array. + */ +int derefine_refine_get_triangles(tessellation *T, int tt, int nr, point *dtip, triangle *trilist, int ntri, int max_n_tri) +{ + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + tetra *t = &DT[tt]; + int qq = t->t[nr]; + + if(ntri >= max_n_tri) + terminate("ntri >= max_n_tri"); + + trilist[ntri].p[0][0] = DTC[tt].cx; + trilist[ntri].p[0][1] = DTC[tt].cy; + + trilist[ntri].p[1][0] = DTC[qq].cx; + trilist[ntri].p[1][1] = DTC[qq].cy; + + trilist[ntri].p[2][0] = dtip->x; + trilist[ntri].p[2][1] = dtip->y; + + if(get_tri_volume(ntri, trilist) < 0) + { + /* swap two points to get proper orientation */ + trilist[ntri].p[1][0] = DTC[tt].cx; + trilist[ntri].p[1][1] = DTC[tt].cy; + + trilist[ntri].p[0][0] = DTC[qq].cx; + trilist[ntri].p[0][1] = DTC[qq].cy; + } + + ntri++; + + return ntri; +} + +/*! \brief Add point and adjust triangles accordingly. + * + * \param[in] q Index of point in DP array. + * \param[in, out] trilist Array of triangles. + * \param[in] ntri Number of elements in trilist before splitting. + * \param[in] max_ntri Maximum number of triangles allowed. + * \param[in] vol (Unused) + * + * \return Updated number of triangles. + */ +int derefine_add_point_and_split_tri(int q, triangle *trilist, int ntri, int max_ntri, double vol) +{ + double m[2], n[2], sc[3], *a; + double cut[2][2], ed[2]; + int i, j, k, kk, l, nnew, flag[3], count, oldq; + + for(i = 0, nnew = ntri; i < ntri; i++) + { + if(trilist[i].owner < 0 || trilist[i].owner >= Mesh.Ndp) + { + char buf[1000]; + sprintf(buf, "i=%d trilist[i].owner=%d\n", i, trilist[i].owner); + terminate(buf); + } + + if(q < 0 || q >= Mesh.Ndp) + { + char buf[1000]; + sprintf(buf, "i=%d q=%d\n", i, q); + terminate(buf); + } + + /* midpoint */ + m[0] = 0.5 * (Mesh.DP[q].x + Mesh.DP[trilist[i].owner].x); + m[1] = 0.5 * (Mesh.DP[q].y + Mesh.DP[trilist[i].owner].y); + + n[0] = (Mesh.DP[q].x - Mesh.DP[trilist[i].owner].x); + n[1] = (Mesh.DP[q].y - Mesh.DP[trilist[i].owner].y); + + if(q == trilist[i].owner) + terminate("q == trilist[i].owner"); + + for(k = 0, count = 0; k < 3; k++) /* determine the side of each point */ + { + a = &trilist[i].p[k][0]; + + sc[k] = (a[0] - m[0]) * n[0] + (a[1] - m[1]) * n[1]; + + if(sc[k] > 0) + { + flag[k] = 1; + count++; + } + else + flag[k] = 0; + } + + switch(count) + { + case 0: /* the whole tetra is on the side of current owner - nothing to be done */ + break; + + case 3: /* the whole tetra is on the side of new point */ + trilist[i].owner = q; /* change owner */ + break; + + case 1: + case 2: + + if(nnew + 2 > max_ntri) + terminate("nnew + 2 > max_ntri"); + + trilist[nnew] = trilist[i]; + trilist[nnew + 1] = trilist[i]; + + /* find the point index that is on the other side */ + for(k = 0; k < 3; k++) + { + if(flag[k] == 1 && count == 1) + break; + if(flag[k] == 0 && count == 2) + break; + } + + for(j = 0; j < 2; j++) + { + kk = k + j + 1; + if(kk > 2) + kk -= 3; + + double *b = trilist[i].p[k]; + double *a = trilist[i].p[kk]; + + for(l = 0; l < 2; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1]); + double t; + if(prod) + t = -sc[k] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 2; l++) + cut[j][l] = b[l] + t * ed[l]; + } + + /* modify the tetra that's assigned to the new point */ + for(j = 0; j < 2; j++) + { + kk = k + j + 1; + if(kk > 2) + kk -= 3; + + for(l = 0; l < 2; l++) + trilist[i].p[kk][l] = cut[j][l]; + } + + oldq = trilist[i].owner; + + if(count == 1) + trilist[i].owner = q; + + /* modify the two new tetras */ + kk = k + 1; + if(kk > 2) + kk -= 3; + + for(l = 0; l < 2; l++) + { + trilist[nnew].p[k][l] = cut[0][l]; + + trilist[nnew + 1].p[k][l] = cut[1][l]; + trilist[nnew + 1].p[kk][l] = cut[0][l]; + } + + if(count == 1) + { + trilist[nnew].owner = oldq; + trilist[nnew + 1].owner = oldq; + } + else + { + trilist[nnew].owner = q; + trilist[nnew + 1].owner = q; + } + nnew += 2; + break; + } + } + + return nnew; +} + +/*! \brief Determines area of triangle (i.e. 2d Volume). + * + * \param i Index in trilist array. + * \param trilist Array with triangles. + * + * \return Area of triangle. + */ +double get_tri_volume(int i, triangle *trilist) +{ + double *p0 = &trilist[i].p[0][0]; + double *p1 = &trilist[i].p[1][0]; + double *p2 = &trilist[i].p[2][0]; + + double nz = (p1[0] - p0[0]) * (p2[1] - p0[1]) - (p1[1] - p0[1]) * (p2[0] - p0[0]); + + return 0.5 * nz; +} + +/*! \brief Process edge for volume calculation. + * + * Calculates the contribution of edge to volumes of neighboring + * Voronoi cells in vol array. + * + * \param[in] T Pointer to tessellation. + * \param[in, out] vol Volume of tetrahedra. + * \param[in] tt Index of triangle in DT array. + * \param[in] nr Index in edge array. + * + * \return void + */ +void derefine_refine_process_edge(tessellation *T, double *vol, int tt, int nr) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra_center *DTC = T->DTC; + + int i, j, qq, p1, p2; + double nx, ny; + double hx, hy; + double dvol, h; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + + point *dpi = &DP[t->p[i]]; + point *dpj = &DP[t->p[j]]; + + qq = t->t[nr]; + + Edge_visited[tt] |= (1 << nr); + Edge_visited[qq] |= (1 << (t->s[nr])); + + p1 = t->p[i]; + p2 = t->p[j]; + + nx = DTC[tt].cx - DTC[qq].cx; + ny = DTC[tt].cy - DTC[qq].cy; + + double area = sqrt(nx * nx + ny * ny); + + hx = 0.5 * (dpi->x - dpj->x); + hy = 0.5 * (dpi->y - dpj->y); + + h = sqrt(hx * hx + hy * hy); + dvol = 0.5 * area * h; + + if(p1 >= 0 && p1 < DeRefMesh.Ndp) + vol[p1] += dvol; + + if(p2 >= 0 && p2 < DeRefMesh.Ndp) + vol[p2] += dvol; +} + +/*! \brief Computes the circum-circle of all triangles in mesh. + * + * \param[in, out] T Pointer to tessellation. + * + * \return void + */ +void compute_circumcircles(tessellation *T) +{ + tetra *DT = T->DT; + char *DTF = T->DTF; + + int i; + + for(i = 0; i < T->Ndt; i++) + { + if(DTF[i] & 1) + continue; + DTF[i] |= 1; + + if(DT[i].p[0] == DPinfinity) + continue; + if(DT[i].p[1] == DPinfinity) + continue; + if(DT[i].p[2] == DPinfinity) + continue; + + update_circumcircle(T, i); + } +} + +/*! \brief Computes the circum-circle of triangle tt. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index of triangle in DT array. + * + * \return void + */ +void update_circumcircle(tessellation *T, int tt) +{ + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + point *DP = T->DP; + + tetra *t = &DT[tt]; + point *p0, *p1, *p2; + int pp0, pp1, pp2; + + pp0 = t->p[0]; + pp1 = t->p[1]; + pp2 = t->p[2]; + + p0 = &DP[pp0]; + p1 = &DP[pp1]; + p2 = &DP[pp2]; + + if(t->p[0] == DPinfinity) + return; + if(t->p[1] == DPinfinity) + return; + if(t->p[2] == DPinfinity) + return; + + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + + double aa = 0.5 * (ax * ax + ay * ay); + double bb = 0.5 * (bx * bx + by * by); + + double mv_data[] = {ax, ay, aa, bx, by, bb}; + double x[2]; + + int status = solve_linear_equations_2d(mv_data, x); + + if(status < 0) + { + terminate("trouble in circum-circle calculation\n"); + } + else + { + x[0] += p0->xx; + x[1] += p0->yy; + + DTC[tt].cx = (x[0] - 1.0) / ConversionFac + CentralOffsetX; + DTC[tt].cy = (x[1] - 1.0) / ConversionFac + CentralOffsetY; + DTC[tt].cz = 0; + } +} + +/*! \brief Computes the integer coordinates from coordinates for a point. + * + * \pararm[in, out] p Pointer to point. + * + * \return void + */ +void set_integers_for_pointer(point *p) +{ + p->xx = (p->x - CentralOffsetX) * ConversionFac + 1.0; + p->yy = (p->y - CentralOffsetY) * ConversionFac + 1.0; + + if(p->xx < 1.0 || p->xx >= 2.0 || p->yy < 1.0 || p->yy >= 2.0) + { + printf("(%g, %g) (%g, %g)\n", p->x, p->y, p->xx, p->yy); + terminate("invalid coordinate range"); + } + + p->ix = double_to_voronoiint(p->xx); + p->iy = double_to_voronoiint(p->yy); + + p->xx = mask_voronoi_int(p->xx); + p->yy = mask_voronoi_int(p->yy); +} + +/*! \brief Outputs Voronoi mesh to file. + * + * Outputs the Voronoi mesh data from task write Task to lastTask in file + * fname. + * + * \param[in] T Pointer to tesselation. + * \param[in] fname File name of file the data is written in. + * \param[in] writeTask Task that gathers information and writes data. + * \param[in] lastTask Last task that is included in this dump. + * + * \return void + */ +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask) +{ + CPU_Step[CPU_MISC] += measure_time(); + + FILE *fd; + char msg[1000]; + MPI_Status status; + int i, j, k, MaxNel, Nel; + int ngas_tot, nel_tot, ndt_tot, nel_before, ndt_before, task; + int *EdgeList, *Nedges, *NedgesOffset, *whichtetra; + int *ngas_list, *nel_list, *ndt_list, *tmp; + float *xyz_edges; + tetra *q, *qstart; + + tetra_center *DTC = T->DTC; + tetra *DT = T->DT; + point *DP = T->DP; + + MaxNel = 10 * NumGas; /* max edge list */ + Nel = 0; /* length of edge list */ + + EdgeList = mymalloc("EdgeList", MaxNel * sizeof(int)); + Nedges = mymalloc("Nedges", NumGas * sizeof(int)); + NedgesOffset = mymalloc("NedgesOffset", NumGas * sizeof(int)); + whichtetra = mymalloc("whichtetra", NumGas * sizeof(int)); + xyz_edges = mymalloc("xyz_edges", T->Ndt * DIMS * sizeof(float)); + ngas_list = mymalloc("ngas_list", sizeof(int) * NTask); + nel_list = mymalloc("nel_list", sizeof(int) * NTask); + ndt_list = mymalloc("ndt_list", sizeof(int) * NTask); + + for(i = 0; i < T->Ndt; i++) + { + xyz_edges[i * DIMS + 0] = DTC[i].cx; + xyz_edges[i * DIMS + 1] = DTC[i].cy; + } + + for(i = 0; i < NumGas; i++) + { + Nedges[i] = 0; + whichtetra[i] = -1; + } + + for(i = 0; i < T->Ndt; i++) + { + for(j = 0; j < DIMS + 1; j++) + if(DP[DT[i].p[j]].task == ThisTask && DP[DT[i].p[j]].index >= 0 && DP[DT[i].p[j]].index < NumGas) + whichtetra[DP[DT[i].p[j]].index] = i; + } + + for(i = 0; i < NumGas; i++) + { + if(whichtetra[i] < 0) + continue; + + qstart = q = &DT[whichtetra[i]]; + + do + { + Nedges[i]++; + + if(Nel >= MaxNel) + terminate("Nel >= MaxNel"); + + EdgeList[Nel++] = q - DT; + + for(j = 0; j < 3; j++) + if(DP[q->p[j]].task == ThisTask && DP[q->p[j]].index == i) + break; + + k = j + 1; + if(k >= 3) + k -= 3; + + q = &DT[q->t[k]]; + } + while(q != qstart); + } + + for(i = 1, NedgesOffset[0] = 0; i < NumGas; i++) + NedgesOffset[i] = NedgesOffset[i - 1] + Nedges[i - 1]; + + /* determine particle numbers and number of edges in file */ + + if(ThisTask == writeTask) + { + ngas_tot = NumGas; + nel_tot = Nel; + ndt_tot = T->Ndt; + + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&ngas_list[task], 1, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + MPI_Recv(&nel_list[task], 1, MPI_INT, task, TAG_LOCALN + 1, MPI_COMM_WORLD, &status); + MPI_Recv(&ndt_list[task], 1, MPI_INT, task, TAG_LOCALN + 2, MPI_COMM_WORLD, &status); + + MPI_Send(&nel_tot, 1, MPI_INT, task, TAG_N, MPI_COMM_WORLD); + MPI_Send(&ndt_tot, 1, MPI_INT, task, TAG_N + 1, MPI_COMM_WORLD); + + ngas_tot += ngas_list[task]; + nel_tot += nel_list[task]; + ndt_tot += ndt_list[task]; + } + + if(!(fd = fopen(fname, "w"))) + { + sprintf(msg, "can't open file `%s' for writing snapshot.\n", fname); + terminate(msg); + } + + my_fwrite(&ngas_tot, sizeof(int), 1, fd); + my_fwrite(&nel_tot, sizeof(int), 1, fd); + my_fwrite(&ndt_tot, sizeof(int), 1, fd); + + my_fwrite(Nedges, sizeof(int), NumGas, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", sizeof(int) * ngas_list[task]); + MPI_Recv(tmp, ngas_list[task], MPI_INT, task, TAG_N + 2, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(int), ngas_list[task], fd); + myfree(tmp); + } + + my_fwrite(NedgesOffset, sizeof(int), NumGas, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", sizeof(int) * ngas_list[task]); + MPI_Recv(tmp, ngas_list[task], MPI_INT, task, TAG_N + 3, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(int), ngas_list[task], fd); + myfree(tmp); + } + + my_fwrite(EdgeList, sizeof(int), Nel, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", sizeof(int) * nel_list[task]); + MPI_Recv(tmp, nel_list[task], MPI_INT, task, TAG_N + 4, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(int), nel_list[task], fd); + myfree(tmp); + } + + my_fwrite(xyz_edges, sizeof(float), T->Ndt * DIMS, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", sizeof(float) * DIMS * ndt_list[task]); + MPI_Recv(tmp, sizeof(float) * DIMS * ndt_list[task], MPI_BYTE, task, TAG_N + 5, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(float), DIMS * ndt_list[task], fd); + myfree(tmp); + } + + fclose(fd); + } + else + { + MPI_Send(&NumGas, 1, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Send(&Nel, 1, MPI_INT, writeTask, TAG_LOCALN + 1, MPI_COMM_WORLD); + MPI_Send(&T->Ndt, 1, MPI_INT, writeTask, TAG_LOCALN + 2, MPI_COMM_WORLD); + + MPI_Recv(&nel_before, 1, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status); + MPI_Recv(&ndt_before, 1, MPI_INT, writeTask, TAG_N + 1, MPI_COMM_WORLD, &status); + + for(i = 0; i < NumGas; i++) + NedgesOffset[i] += nel_before; + for(i = 0; i < Nel; i++) + EdgeList[i] += ndt_before; + + MPI_Send(Nedges, NumGas, MPI_INT, writeTask, TAG_N + 2, MPI_COMM_WORLD); + MPI_Send(NedgesOffset, NumGas, MPI_INT, writeTask, TAG_N + 3, MPI_COMM_WORLD); + MPI_Send(EdgeList, Nel, MPI_INT, writeTask, TAG_N + 4, MPI_COMM_WORLD); + MPI_Send(xyz_edges, sizeof(float) * DIMS * T->Ndt, MPI_BYTE, writeTask, TAG_N + 5, MPI_COMM_WORLD); + } + + myfree(ndt_list); + myfree(nel_list); + myfree(ngas_list); + myfree(xyz_edges); + myfree(whichtetra); + myfree(NedgesOffset); + myfree(Nedges); + myfree(EdgeList); + + mpi_printf("wrote Voronoi mesh to file\n"); + + CPU_Step[CPU_MAKEIMAGES] += measure_time(); +} + +#endif /* #if defined(TWODIMS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c new file mode 100644 index 0000000000..f8cc3ad712 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c @@ -0,0 +1,5111 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_3d.c + * \date 05/2018 + * \brief Routines to build a 3d Voronoi mesh. + * \details Note that some of these routines have the same name as the ones + * in voronoi_1d.c and voronoi_2d.c and just replace them in case + * neither the Config-option TWODIMS nor ONEDIMS is active. + * contains functions: + * void initialize_and_create_first_tetra(tessellation * T) + * void get_line_segments(int sphp_index, int dp_index, double + * *segments, unsigned int *nof_elements, unsigned int + * max_elements) + * void process_edge_faces_and_volumes(tessellation * T, + * int tt, int nr) + * int derefine_refine_get_triangles(tessellation * T, int tt, + * int nr, point * dtip, triangle * trilist, int ntri, + * int max_n_tri) + * double get_tri_volume(int i, triangle * trilist) + * int derefine_add_point_and_split_tri(int q, triangle + * * trilist, int ntri, int max_ntri, double vol) + * void derefine_refine_process_edge(tessellation * T, + * double *vol, int tt, int nr) + * int insert_point(tessellation * T, int pp, int ttstart) + * int convex_edge_test(tessellation * T, int tt, int tip, + * int *edgenr) + * void make_a_face_split(tessellation * T, int tt0, + * int face_nr, int pp, int tt1, int tt2, int qq1, int qq2) + * void make_an_edge_split(tessellation * T, int tt0, + * int edge_nr, int count, int pp, int *ttlist) + * void make_a_4_to_4_flip(tessellation * T, int tt, + * int tip_index, int edge_nr) + * void make_a_1_to_4_flip(tessellation * T, int pp, int tt0, + * int tt1, int tt2, int tt3) + * void make_a_3_to_2_flip(tessellation * T, int tt0, int tt1, + * int tt2, int tip, int edge, int bottom) + * void make_a_2_to_3_flip(tessellation * T, int tt0, int tip, + * int tt1, int bottom, int qq, int tt2) + * int get_tetra(tessellation * T, point * p, int *moves, + * int ttstart, int *flag, int *edgeface_nr) + * int InTetra(tessellation * T, int tt, point * p, + * int *edgeface_nr, int *nexttetra) + * void compute_circumcircles(tessellation * T) + * void calc_mpz_determinant(mpz_t det, mpz_t ax, mpz_t ay, + * mpz_t az, mpz_t bx, mpz_t by, mpz_t bz, mpz_t cx, + * mpz_t cy, mpz_t cz) + * void get_circumcircle_exact(tessellation * T, int tt, + * double *x, double *y, double *z) + * void update_circumcircle(tessellation * T, int tt) + * int test_tetra_orientation(point * p0, point * p1, + * point * p2, point * p3) + * double calculate_tetra_volume(point * p0, point * p1, + * point * p2, point * p3) + * void add_row(double *m, int r1, int r2, double fac) + * int solve_linear_equations(double *m, double *res) + * void set_integers_for_pointer(point * p) + * int InSphere_Exact(point * p0, point * p1, point * p2, + * point * p3, point * p) + * int InSphere_Quick(point * p0, point * p1, point * p2, + * point * p3, point * p) + * int InSphere_Errorbound(point * p0, point * p1, point * p2, + * point * p3, point * p) + * int Orient3d_Exact(point * p0, point * p1, point * p2, + * point * p3) + * int Orient3d_Quick(point * p0, point * p1, point * p2, + * point * p3) + * int Orient3d(point * p0, point * p1, point * p2, point * p3) + * int compare_face_sort(const void *a, const void *b) + * void get_voronoi_face_vertex_indices(tessellation * T) + * void get_voronoi_face_vertex_coordinates(tessellation * T) + * void sort_faces_by_ID(void) + * void write_voronoi_face_vertex_indices(tessellation * T, + * char *fname1, char *fname2, int writeTask, int lastTask) + * void write_voronoi_face_vertex_coordinates(tessellation * T, + * char *fname, int writeTask, int lastTask) + * void write_voronoi_mesh(tessellation * T, char *fname, + * int writeTask, int lastTask) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if !defined(TWODIMS) && !defined(ONEDIMS) /* will only be compiled in 3D case */ + +#define INSIDE_EPS 1.0e-6 +#define GAUSS_EPS 1.0e-8 + +const int access_triangles[4][3] = {{1, 3, 2}, {0, 2, 3}, {0, 3, 1}, {0, 1, 2}}; + +const int edge_start[6] = {0, 0, 0, 1, 1, 2}; +const int edge_end[6] = {1, 2, 3, 2, 3, 3}; +const int edge_opposite[6] = {3, 1, 2, 3, 0, 1}; +const int edge_nexttetra[6] = {2, 3, 1, 0, 2, 0}; + +/*! \brief Initializes 3d tessellation and create all-enclosing tetrahedron. + * + * \param[out] T Pointer to tessellation structure which is set and its arrays + * are allocated in this routine. + * + * \return void + */ +void initialize_and_create_first_tetra(tessellation *T) +{ + point *p; + int i, n; + + T->MaxNdp = T->Indi.AllocFacNdp; + T->MaxNdt = T->Indi.AllocFacNdt; + T->MaxNvf = T->Indi.AllocFacNvf; + + T->Ndp = 0; + T->Ndt = 0; + T->Nvf = 0; + + T->VF = mymalloc_movable(&T->VF, "VF", T->MaxNvf * sizeof(face)); + + T->DP = mymalloc_movable(&T->DP, "DP", (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + T->DT = mymalloc_movable(&T->DT, "DT", T->MaxNdt * sizeof(tetra)); + + /* construct all encompassing huge tetrahedron */ + + double box, tetra_incircle, tetra_sidelength, tetra_height, tetra_face_height; + + box = boxSize_X; + if(box < boxSize_Y) + box = boxSize_Y; + if(box < boxSize_Z) + box = boxSize_Z; + + tetra_incircle = 1.5 * box; + tetra_sidelength = tetra_incircle * sqrt(24); + tetra_height = sqrt(2.0 / 3) * tetra_sidelength; + tetra_face_height = sqrt(3.0) / 2.0 * tetra_sidelength; + + point *DP = T->DP; + tetra *DT = T->DT; + + /* first, let's make the points */ + DP[-4].x = 0.5 * tetra_sidelength; + DP[-4].y = -1.0 / 3 * tetra_face_height; + DP[-4].z = -0.25 * tetra_height; + + DP[-3].x = 0; + DP[-3].y = 2.0 / 3 * tetra_face_height; + DP[-3].z = -0.25 * tetra_height; + + DP[-2].x = -0.5 * tetra_sidelength; + DP[-2].y = -1.0 / 3 * tetra_face_height; + DP[-2].z = -0.25 * tetra_height; + + DP[-1].x = 0; + DP[-1].y = 0; + DP[-1].z = 0.75 * tetra_height; + + for(i = -4; i <= -1; i++) + { + DP[i].x += 0.5 * box; + DP[i].y += 0.5 * box; + DP[i].z += 0.5 * box; + } + + for(i = -4, p = &DP[-4]; i < 0; i++, p++) + { + p->index = -1; + p->task = ThisTask; + p->timebin = 0; + } + + /* we also define a neutral element at infinity */ + DPinfinity = -5; + + DP[DPinfinity].x = MAX_DOUBLE_NUMBER; + DP[DPinfinity].y = MAX_DOUBLE_NUMBER; + DP[DPinfinity].z = MAX_DOUBLE_NUMBER; + DP[DPinfinity].index = -1; + DP[DPinfinity].task = ThisTask; + DP[DPinfinity].timebin = 0; + + /* now let's make the big tetrahedron */ + DT[0].p[0] = -4; + DT[0].p[1] = -3; + DT[0].p[2] = -2; + DT[0].p[3] = -1; + + /* On the outer faces, we attach tetrahedra with the neutral element as tip. + * This way we will be able to navigate nicely within the tesselation, + * and all tetrahedra have defined neighbouring tetrahedra. + */ + + for(i = 0; i < 4; i++) + { + n = i + 1; /* tetra index */ + + DT[0].t[i] = n; + DT[0].s[i] = 3; + + DT[n].t[3] = 0; + DT[n].s[3] = i; + DT[n].p[3] = DPinfinity; + } + + DT[1].p[0] = DT[0].p[1]; + DT[1].p[1] = DT[0].p[2]; + DT[1].p[2] = DT[0].p[3]; + + DT[2].p[0] = DT[0].p[0]; + DT[2].p[1] = DT[0].p[3]; + DT[2].p[2] = DT[0].p[2]; + + DT[3].p[0] = DT[0].p[0]; + DT[3].p[1] = DT[0].p[1]; + DT[3].p[2] = DT[0].p[3]; + + DT[4].p[0] = DT[0].p[0]; + DT[4].p[1] = DT[0].p[2]; + DT[4].p[2] = DT[0].p[1]; + + DT[1].t[0] = 2; + DT[2].t[0] = 1; + DT[1].s[0] = 0; + DT[2].s[0] = 0; + + DT[1].t[1] = 3; + DT[3].t[0] = 1; + DT[1].s[1] = 0; + DT[3].s[0] = 1; + + DT[1].t[2] = 4; + DT[4].t[0] = 1; + DT[1].s[2] = 0; + DT[4].s[0] = 2; + + DT[2].t[2] = 3; + DT[3].t[1] = 2; + DT[2].s[2] = 1; + DT[3].s[1] = 2; + + DT[2].t[1] = 4; + DT[4].t[2] = 2; + DT[2].s[1] = 2; + DT[4].s[2] = 1; + + DT[3].t[2] = 4; + DT[4].t[1] = 3; + DT[3].s[2] = 1; + DT[4].s[1] = 2; + + T->Ndt = 5; /* we'll start out with 5 tetras */ + + CentralOffsetX = 0.5 * box - 0.5000001 * tetra_sidelength; + CentralOffsetY = 0.5 * box - (1.0000001 / 3) * tetra_face_height; + CentralOffsetZ = 0.5 * box - 0.25000001 * tetra_height; + + ConversionFac = 1.0 / (1.001 * tetra_sidelength); + +#ifndef OPTIMIZE_MEMORY_USAGE + for(i = -4; i < 0; i++) + set_integers_for_point(T, i); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ +} + +#ifdef TETRA_INDEX_IN_FACE +/*! \brief Gets the line segments of a Voronoi cell. + * + * Warning: The correspondance sphp_index == dp_index holds only for a global + * timestep! + * + * \param[in] sphp_index The index of the Voronoi cell. + * \param[in] dp_index The index of the corresponding Delaunay point. + * \param[out] segments The array in which the line segments are stored. + * \param[out] nof_elements The number of elements written in segments during + * this function call. + * \param[in] max_elements The maximum size of the segments array. + * + * \return void + */ +void get_line_segments(int sphp_index, int dp_index, double *segments, unsigned int *nof_elements, unsigned int max_elements) +{ + // index for segments array + unsigned int a = 0; + + int edge = SphP[sphp_index].first_connection; + int last_edge = SphP[sphp_index].last_connection; + + // loop over all interfaces of the cell + while(1) + { + int dq_index = DC[edge].dp_index; + + // one of the tetrahedras around the Delaunay connection + int tt = DC[edge].dt_index; + tetra *t = &Mesh.DT[tt]; + + // find the local index of the edge + int nr = 6; + int e, dp_start_index, dp_end_index; + + for(e = 0; e < 6; e++) + { + dp_start_index = t->p[edge_start[e]]; + dp_end_index = t->p[edge_end[e]]; + + if((dp_start_index == dp_index && dp_end_index == dq_index) || (dp_start_index == dq_index && dp_end_index == dp_index)) + { + nr = e; + break; + } + } + + // ensure that the local edge index has been found + assert(nr != 6); + + // already set: t,tt,nr + int i, j, k, l, m, ii, jj, kk, ll, nn; + tetra *prev, *next; + tetra_center *prevc, *nextc; + + i = edge_start[nr]; + j = edge_end[nr]; + k = edge_opposite[nr]; + l = edge_nexttetra[nr]; + + prev = t; + prevc = &Mesh.DTC[tt]; + + do + { + nn = prev->t[l]; + next = &Mesh.DT[nn]; + nextc = &Mesh.DTC[nn]; + + if(a > max_elements - 7) + { + terminate("termination in voronoi_3d.c get_line_segments: not enough memory!"); + } + + segments[a++] = prevc->cx; + segments[a++] = prevc->cy; + segments[a++] = prevc->cz; + segments[a++] = nextc->cx; + segments[a++] = nextc->cy; + segments[a++] = nextc->cz; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + prev = next; + prevc = nextc; + + i = ii; + l = ll; + j = jj; + k = kk; + } + while(next != t); + + if(edge == last_edge) + { + break; + } + + edge = DC[edge].next; + + } // end of while loop + + *nof_elements = a; + + return; +} +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + +/*! \brief Calculate cell volumes and face areas of mesh. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[in] nr Index in edges. + * + * \return void + */ +void process_edge_faces_and_volumes(tessellation *T, int tt, int nr) +{ + int i, j, k, l, m, ii, jj, kk, ll, nn, count, nr_next, p1, p2; + face *f; + tetra *prev, *next; + tetra_center *prevc, *nextc; + double ax, ay, az; + double bx, by, bz; + double cx, cy, cz; + double nx, ny, nz; + double sx, sy, sz; + double hhx, hhy, hhz; + double darea, dvol, h; + + if(T->Nvf + 1 >= T->MaxNvf) + { + T->Indi.AllocFacNvf *= ALLOC_INCREASE_FACTOR; + T->MaxNvf = T->Indi.AllocFacNvf; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvf=%d Indi.AllocFacNvf=%g\n", ThisTask, T->MaxNvf, T->Indi.AllocFacNvf); +#endif /* #ifdef VERBOSE */ + T->VF = myrealloc_movable(T->VF, T->MaxNvf * sizeof(face)); + + if(T->Nvf + 1 >= T->MaxNvf) + terminate("Nvf larger than MaxNvf"); + } + + tetra *DT = T->DT; + point *DP = T->DP; + face *VF = T->VF; + tetra_center *DTC = T->DTC; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + k = edge_opposite[nr]; + l = edge_nexttetra[nr]; + + Edge_visited[tt] |= (1 << nr); + + p1 = t->p[i]; + p2 = t->p[j]; + + f = &VF[T->Nvf++]; + + f->area = 0; + f->p1 = p1; + f->p2 = p2; + + f->cx = 0; + f->cy = 0; + f->cz = 0; + +#ifdef TETRA_INDEX_IN_FACE + f->dt_index = tt; +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + + hhx = 0.5 * (DP[p1].x - DP[p2].x); + hhy = 0.5 * (DP[p1].y - DP[p2].y); + hhz = 0.5 * (DP[p1].z - DP[p2].z); + + h = sqrt(hhx * hhx + hhy * hhy + hhz * hhz); + + cx = DTC[tt].cx; + cy = DTC[tt].cy; + cz = DTC[tt].cz; + + count = 0; + + prev = t; + prevc = &DTC[tt]; + do + { + nn = prev->t[l]; + next = &DT[nn]; + nextc = &DTC[nn]; + + if(prev != t && next != t) + { + ax = prevc->cx - cx; + ay = prevc->cy - cy; + az = prevc->cz - cz; + + bx = nextc->cx - cx; + by = nextc->cy - cy; + bz = nextc->cz - cz; + + nx = ay * bz - az * by; + ny = az * bx - ax * bz; + nz = ax * by - ay * bx; + + sx = nextc->cx + prevc->cx + cx; + sy = nextc->cy + prevc->cy + cy; + sz = nextc->cz + prevc->cz + cz; + + darea = 0.5 * sqrt(nx * nx + ny * ny + nz * nz); + f->area += darea; + + darea *= (1.0 / 3); + + f->cx += darea * sx; + f->cy += darea * sy; + f->cz += darea * sz; + } + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + /* need to determine the edge number to be able to flag it */ + + for(nr_next = 0; nr_next < 6; nr_next++) + if((edge_start[nr_next] == ii && edge_end[nr_next] == jj) || (edge_start[nr_next] == jj && edge_end[nr_next] == ii)) + { + if((Edge_visited[nn] & (1 << nr_next)) && next != t) + terminate("inconsistency"); + + Edge_visited[nn] |= (1 << nr_next); + break; + } + + prev = next; + prevc = nextc; + i = ii; + l = ll; + j = jj; + k = kk; + + count++; + + if(count > 1000) + terminate("count is too large"); + } + while(next != t); + + i = edge_start[nr]; + j = edge_end[nr]; + + if(f->area) + { + f->cx /= f->area; + f->cy /= f->area; + f->cz /= f->area; + } + +#ifdef REFINEMENT_MERGE_CELLS + f->t = tt; + f->nr = nr; /* delaunay tetra and edge number that generated this face */ +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + + dvol = (1.0 / 3) * f->area * h; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + double angle = sqrt(f->area / M_PI) / h; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + + if(DP[p1].task == ThisTask && DP[p1].index >= 0 && DP[p1].index < NumGas) + { + if(TimeBinSynchronized[P[DP[p1].index].TimeBinHydro]) + { + SphP[DP[p1].index].Volume += dvol; + SphP[DP[p1].index].SurfaceArea += f->area; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + if(SphP[DP[p1].index].MaxFaceAngle < angle) + SphP[DP[p1].index].MaxFaceAngle = angle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + +#ifdef OUTPUT_SURFACE_AREA + if(f->area) + SphP[DP[p1].index].CountFaces++; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#if defined(REFINEMENT_SPLIT_CELLS) + if(SphP[DP[p1].index].MinimumEdgeDistance > h) + SphP[DP[p1].index].MinimumEdgeDistance = h; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + /* let's now compute the center-of-mass of the pyramid at the bottom top */ + sx = 0.75 * f->cx + 0.25 * DP[p1].x; + sy = 0.75 * f->cy + 0.25 * DP[p1].y; + sz = 0.75 * f->cz + 0.25 * DP[p1].z; + + SphP[DP[p1].index].Center[0] += dvol * sx; + SphP[DP[p1].index].Center[1] += dvol * sy; + SphP[DP[p1].index].Center[2] += dvol * sz; + } + } + + if(DP[p2].task == ThisTask && DP[p2].index >= 0 && DP[p2].index < NumGas) + { + if(TimeBinSynchronized[P[DP[p2].index].TimeBinHydro]) + { + SphP[DP[p2].index].Volume += dvol; + SphP[DP[p2].index].SurfaceArea += f->area; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + if(SphP[DP[p2].index].MaxFaceAngle < angle) + SphP[DP[p2].index].MaxFaceAngle = angle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + +#ifdef OUTPUT_SURFACE_AREA + if(f->area) + SphP[DP[p2].index].CountFaces++; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ +#if defined(REFINEMENT_SPLIT_CELLS) + if(SphP[DP[p2].index].MinimumEdgeDistance > h) + SphP[DP[p2].index].MinimumEdgeDistance = h; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + /* let's now compute the center-of-mass of the pyramid on top */ + sx = 0.75 * f->cx + 0.25 * DP[p2].x; + sy = 0.75 * f->cy + 0.25 * DP[p2].y; + sz = 0.75 * f->cz + 0.25 * DP[p2].z; + + SphP[DP[p2].index].Center[0] += dvol * sx; + SphP[DP[p2].index].Center[1] += dvol * sy; + SphP[DP[p2].index].Center[2] += dvol * sz; + } + } + + int low_p, high_p; + + if(DP[p1].ID < DP[p2].ID) + { + low_p = p1; + high_p = p2; + } + else + { + low_p = p2; + high_p = p1; + } + + int this_task_responsible_flag = 0; + + if(TimeBinSynchronized[DP[low_p].timebin]) /* the one with the lower ID is active */ + { + /* we need to check whether the one with the lower ID is a local particle */ + if(DP[low_p].task == ThisTask && DP[low_p].index >= 0 && DP[low_p].index < NumGas) + this_task_responsible_flag = 1; + } + else if(TimeBinSynchronized[DP[high_p].timebin]) /* only the side with the higher ID is active */ + { + /* we need to check whether we hold the one with the higher ID, if yes, we'll do it */ + if(DP[high_p].task == ThisTask && DP[high_p].index >= 0 && DP[high_p].index < NumGas) + this_task_responsible_flag = 1; + } + + if(this_task_responsible_flag) + { + for(k = 0; k < 2; k++) + { + int p, q; + + if(k == 0) + { + q = p1; + p = DP[q].index; + } + else + { + q = p2; + p = DP[q].index; + } + + if(DP[q].task == ThisTask) + { + if(DP[q].index >= NumGas) /* this is a local ghost point */ + p -= NumGas; + + SphP[p].ActiveArea += f->area; + } + else + { + /* here we have a foreign ghost point */ + if(DP[q].originalindex < 0) + terminate("should not happen"); + + if(Narea >= MaxNarea) + { + T->Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNarea = T->Indi.AllocFacNflux; + AreaList = myrealloc_movable(AreaList, MaxNarea * sizeof(struct area_list_data)); + + if(Narea >= MaxNarea) + terminate("Narea >= MaxNarea"); + } + + AreaList[Narea].task = DP[q].task; + AreaList[Narea].index = DP[q].originalindex; + AreaList[Narea].darea = f->area; + Narea++; + } + } + } +} + +/*! \brief Gathers tetrahedron data as elements in array called 'trilist'. + * + * \param[in] T Pointer to tessellation. + * \param[in] tt Index of tetrahedron in T->DT array. + * \param[in] nr Index in (global) edge arrays. + * \param[in] dtip Point representing tip of tetrahedron. + * \param[out] trilist List of triangles. + * \param[in] ntri Index in trilist which should be filled. + * \param[in] max_n_tri Maximum index in trilist. + * + * \return New length of trilist data. + */ +int derefine_refine_get_triangles(tessellation *T, int tt, int nr, point *dtip, triangle *trilist, int ntri, int max_n_tri) +{ + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + + int i, j, k, l, m, ii, jj, kk, ll, nn, count; + tetra *prev, *next; + tetra_center *prevc, *nextc; + double cx, cy, cz; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + k = edge_opposite[nr]; + l = edge_nexttetra[nr]; + + cx = DTC[tt].cx; + cy = DTC[tt].cy; + cz = DTC[tt].cz; + + count = 0; + + prev = t; + prevc = &DTC[tt]; + do + { + nn = prev->t[l]; + next = &DT[nn]; + nextc = &DTC[nn]; + + if(prev != t && next != t) + { + if(ntri >= max_n_tri) + terminate("ntri >= max_n_tri"); + + trilist[ntri].p[0][0] = cx; + trilist[ntri].p[0][1] = cy; + trilist[ntri].p[0][2] = cz; + + trilist[ntri].p[1][0] = prevc->cx; + trilist[ntri].p[1][1] = prevc->cy; + trilist[ntri].p[1][2] = prevc->cz; + + trilist[ntri].p[2][0] = nextc->cx; + trilist[ntri].p[2][1] = nextc->cy; + trilist[ntri].p[2][2] = nextc->cz; + + trilist[ntri].p[3][0] = dtip->x; + trilist[ntri].p[3][1] = dtip->y; + trilist[ntri].p[3][2] = dtip->z; + + if(get_tri_volume(ntri, trilist) < 0) + { + /* swap two points to get proper orientation */ + trilist[ntri].p[3][0] = nextc->cx; + trilist[ntri].p[3][1] = nextc->cy; + trilist[ntri].p[3][2] = nextc->cz; + + trilist[ntri].p[2][0] = dtip->x; + trilist[ntri].p[2][1] = dtip->y; + trilist[ntri].p[2][2] = dtip->z; + } + + ntri++; + } + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + prev = next; + prevc = nextc; + i = ii; + l = ll; + j = jj; + k = kk; + + count++; + + if(count > 1000) + terminate("count is too large"); + } + while(next != t); + + return ntri; +} + +/*! \brief Returns volume of a tetrahedron. + * + * \param[in] i Index of tetrahedron in trilist. + * \param[in] trilist Array with tetrahedra. + * + * \return Volume of tetrahedron. + */ +double get_tri_volume(int i, triangle *trilist) +{ + double nx, ny, nz; + + double *p0 = &trilist[i].p[0][0]; + double *p1 = &trilist[i].p[1][0]; + double *p2 = &trilist[i].p[2][0]; + double *p3 = &trilist[i].p[3][0]; + + nx = (p1[1] - p0[1]) * (p2[2] - p0[2]) - (p1[2] - p0[2]) * (p2[1] - p0[1]); + ny = (p1[2] - p0[2]) * (p2[0] - p0[0]) - (p1[0] - p0[0]) * (p2[2] - p0[2]); + nz = (p1[0] - p0[0]) * (p2[1] - p0[1]) - (p1[1] - p0[1]) * (p2[0] - p0[0]); + + return (nx * (p3[0] - p0[0]) + ny * (p3[1] - p0[1]) + nz * (p3[2] - p0[2])) / 6.0; +} + +/*! \brief Add point and adjust tetrahedra accordingly. + * + * \param[in] q Index of point in DP array. + * \param[in, out] trilist Array of tetrahedra. + * \param[in] ntri Number of elements in trilist before splitting. + * \param[in] max_ntri Maximum number of tetrahedron allowed. + * \param[in] vol Volume of tetrahedron to be split. + * + * \return Updated number of triangles. + */ +int derefine_add_point_and_split_tri(int q, triangle *trilist, int ntri, int max_ntri, double vol) +{ +#define MIN_VOL_FAC 1.0e-6 + double m[3], n[3], sc[4], *a; + double cut[3][3], p[8][3], ed[3]; + int i, j, k, l, nnew, flag[4], count, oldq; + double vvi, vlargest, vv[5]; + int ilargest, nadd; + + for(i = 0, nnew = ntri; i < ntri; i++) + { + if(q < 0 || q >= Mesh.Ndp) + { + char buf[1000]; + sprintf(buf, "q=%d\n", q); + terminate(buf); + } + + if(trilist[i].owner < 0 || trilist[i].owner >= Mesh.Ndp) + { + char buf[1000]; + sprintf(buf, "trilist[i].owner=%d\n", trilist[i].owner); + terminate(buf); + } + + /* midpoint */ + m[0] = 0.5 * (Mesh.DP[q].x + Mesh.DP[trilist[i].owner].x); + m[1] = 0.5 * (Mesh.DP[q].y + Mesh.DP[trilist[i].owner].y); + m[2] = 0.5 * (Mesh.DP[q].z + Mesh.DP[trilist[i].owner].z); + + n[0] = (Mesh.DP[q].x - Mesh.DP[trilist[i].owner].x); + n[1] = (Mesh.DP[q].y - Mesh.DP[trilist[i].owner].y); + n[2] = (Mesh.DP[q].z - Mesh.DP[trilist[i].owner].z); + + if(q == trilist[i].owner) + terminate("q == trilist[i].owner"); + + for(k = 0, count = 0; k < 4; k++) /* determine the side of each point */ + { + a = &trilist[i].p[k][0]; + + sc[k] = (a[0] - m[0]) * n[0] + (a[1] - m[1]) * n[1] + (a[2] - m[2]) * n[2]; + + if(sc[k] > 0) + { + flag[k] = 1; + count++; + } + else + flag[k] = 0; + } + + switch(count) + { + case 0: /* the whole tetra is on the side of current owner - nothing to be done */ + break; + + case 4: /* the whole tetra is on the side of new point */ + trilist[i].owner = q; /* change owner */ + break; + + case 1: + case 3: + + /* we have one point on either side */ + /* for count=1 the tip of the tetra is cut off and assigned to the new point. */ + /* the rest is subdivided into three tetras */ + + if(nnew + 3 > max_ntri) + { + terminate("nnew + 3 > max_ntri"); + } + + trilist[nnew] = trilist[i]; + trilist[nnew + 1] = trilist[i]; + trilist[nnew + 2] = trilist[i]; + + /* find the point index that is on the other side */ + for(k = 0; k < 4; k++) + { + if(flag[k] == 1 && count == 1) + break; + if(flag[k] == 0 && count == 3) + break; + } + + /* determine the cut-points on the corresponding edges */ + + for(j = 0; j < 3; j++) + { + double *b = trilist[i].p[k]; + double *a = trilist[i].p[access_triangles[k][j]]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[k] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + cut[j][l] = b[l] + t * ed[l]; + } + + /* modify the tetra that's assigned to the new point */ + for(j = 0; j < 3; j++) + { + double *a = trilist[i].p[access_triangles[k][j]]; + for(l = 0; l < 3; l++) + a[l] = cut[j][l]; + } + + oldq = trilist[i].owner; + + if(count == 1) + trilist[i].owner = q; + + /* modify the three new tetras */ + + for(l = 0; l < 3; l++) + { + trilist[nnew].p[k][l] = cut[0][l]; + + trilist[nnew + 1].p[access_triangles[k][0]][l] = cut[0][l]; + trilist[nnew + 1].p[k][l] = cut[2][l]; + + trilist[nnew + 2].p[access_triangles[k][0]][l] = cut[0][l]; + trilist[nnew + 2].p[access_triangles[k][2]][l] = cut[2][l]; + trilist[nnew + 2].p[k][l] = cut[1][l]; + } + + if(count == 1) + { + trilist[nnew].owner = oldq; + trilist[nnew + 1].owner = oldq; + trilist[nnew + 2].owner = oldq; + } + else + { + trilist[nnew].owner = q; + trilist[nnew + 1].owner = q; + trilist[nnew + 2].owner = q; + } + + nadd = 3; + + vvi = fabs(get_tri_volume(i, trilist)); + for(l = 0; l < nadd; l++) + vv[l] = fabs(get_tri_volume(nnew + l, trilist)); + + /* determine largest */ + ilargest = i; + vlargest = vvi; + for(l = 0; l < nadd; l++) + if(vv[l] > vlargest) + { + vlargest = vv[l]; + ilargest = nnew + l; + } + if(i != ilargest) + { + /* swap the largest to location i */ + triangle trisave = trilist[i]; + trilist[i] = trilist[ilargest]; + trilist[ilargest] = trisave; + + vv[ilargest - nnew] = vvi; + } + + for(l = 0; l < nadd; l++) + { + if(vv[l] < MIN_VOL_FAC * vol) + { + vv[l] = vv[nadd - 1]; + trilist[nnew + l] = trilist[nnew + nadd - 1]; + l--; + nadd--; + } + } + + nnew += nadd; + break; + + case 2: + /* we have two points on either side */ + + if(nnew + 5 > max_ntri) + terminate("nnew + 5 > max_ntri"); + + int kfirst, ksecond, jfirst, jsecond; + + if(flag[2] == 1 && flag[3] == 1) + { + kfirst = 3; + ksecond = 2; + jfirst = 0; + jsecond = 1; + } + else if(flag[1] == 1 && flag[3] == 1) + { + kfirst = 3; + ksecond = 1; + jfirst = 2; + jsecond = 0; + } + else if(flag[0] == 1 && flag[3] == 1) + { + kfirst = 3; + ksecond = 0; + jfirst = 1; + jsecond = 2; + } + else if(flag[1] == 1 && flag[2] == 1) + { + kfirst = 1; + ksecond = 2; + jfirst = 3; + jsecond = 0; + } + else if(flag[0] == 1 && flag[2] == 1) + { + kfirst = 0; + ksecond = 2; + jfirst = 1; + jsecond = 3; + } + else if(flag[0] == 1 && flag[1] == 1) + { + kfirst = 0; + ksecond = 1; + jfirst = 3; + jsecond = 2; + } + else + terminate("can't be"); + + int next = 0; + + for(l = 0; l < 3; l++) + p[next][l] = trilist[i].p[kfirst][l]; + next++; + + /* determine cuts with the corresponding two edges */ + { + double *b = trilist[i].p[kfirst]; + double *a = trilist[i].p[jfirst]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[kfirst] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + p[next][l] = b[l] + t * ed[l]; + next++; + + for(l = 0; l < 3; l++) + p[next][l] = a[l]; + next++; + } + + { + double *b = trilist[i].p[kfirst]; + double *a = trilist[i].p[jsecond]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[kfirst] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + p[next][l] = b[l] + t * ed[l]; + next++; + + for(l = 0; l < 3; l++) + p[next][l] = a[l]; + next++; + } + + for(l = 0; l < 3; l++) + p[next][l] = trilist[i].p[ksecond][l]; + next++; + + { + double *b = trilist[i].p[ksecond]; + double *a = trilist[i].p[jfirst]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[ksecond] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + p[next][l] = b[l] + t * ed[l]; + next++; + } + + { + double *b = trilist[i].p[ksecond]; + double *a = trilist[i].p[jsecond]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[ksecond] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + p[next][l] = b[l] + t * ed[l]; + next++; + } + + oldq = trilist[i].owner; + + /* now let's initialize the new triangles */ + for(l = 0; l < 3; l++) + { + /* first the ones that get to the new side */ + trilist[i].p[0][l] = p[0][l]; + trilist[i].p[1][l] = p[6][l]; + trilist[i].p[2][l] = p[5][l]; + trilist[i].p[3][l] = p[7][l]; + + trilist[nnew].p[0][l] = p[1][l]; + trilist[nnew].p[1][l] = p[3][l]; + trilist[nnew].p[2][l] = p[7][l]; + trilist[nnew].p[3][l] = p[0][l]; + + trilist[nnew + 1].p[0][l] = p[1][l]; + trilist[nnew + 1].p[1][l] = p[7][l]; + trilist[nnew + 1].p[2][l] = p[6][l]; + trilist[nnew + 1].p[3][l] = p[0][l]; + + /* now the ones that are on the old side */ + trilist[nnew + 2].p[0][l] = p[1][l]; + trilist[nnew + 2].p[1][l] = p[2][l]; + trilist[nnew + 2].p[2][l] = p[6][l]; + trilist[nnew + 2].p[3][l] = p[4][l]; + + trilist[nnew + 3].p[0][l] = p[3][l]; + trilist[nnew + 3].p[1][l] = p[1][l]; + trilist[nnew + 3].p[2][l] = p[6][l]; + trilist[nnew + 3].p[3][l] = p[4][l]; + + trilist[nnew + 4].p[0][l] = p[3][l]; + trilist[nnew + 4].p[1][l] = p[6][l]; + trilist[nnew + 4].p[2][l] = p[7][l]; + trilist[nnew + 4].p[3][l] = p[4][l]; + } + + trilist[i].owner = q; + trilist[nnew].owner = q; + trilist[nnew + 1].owner = q; + + trilist[nnew + 2].owner = oldq; + trilist[nnew + 3].owner = oldq; + trilist[nnew + 4].owner = oldq; + + nadd = 5; + + vvi = fabs(get_tri_volume(i, trilist)); + for(l = 0; l < nadd; l++) + vv[l] = fabs(get_tri_volume(nnew + l, trilist)); + + /* determine largest */ + ilargest = i; + vlargest = vvi; + for(l = 0; l < nadd; l++) + if(vv[l] > vlargest) + { + vlargest = vv[l]; + ilargest = nnew + l; + } + if(i != ilargest) + { + /* swap the largest to location i */ + triangle trisave = trilist[i]; + trilist[i] = trilist[ilargest]; + trilist[ilargest] = trisave; + + vv[ilargest - nnew] = vvi; + } + + for(l = 0; l < nadd; l++) + { + if(vv[l] < MIN_VOL_FAC * vol) + { + vv[l] = vv[nadd - 1]; + trilist[nnew + l] = trilist[nnew + nadd - 1]; + l--; + nadd--; + } + } + + nnew += nadd; + break; + } + } + + return nnew; +} + +/*! \brief Processes edge for volume calculation. + * + * Calculates the contribution of edge to volumes of neighboring + * Voronoi cells in vol array. + * + * \param[in] T Pointer to tesselation. + * \param[in, out] volume of tetrahedra. + * \param[in] tt Index of triangle in DT array. + * \param[in] nr Index in edge array. + * + * \return void + */ +void derefine_refine_process_edge(tessellation *T, double *vol, int tt, int nr) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra_center *DTC = T->DTC; + + int i, j, k, l, m, ii, jj, kk, ll, nn, count, nr_next, p1, p2; + tetra *prev, *next; + tetra_center *prevc, *nextc; + double ax, ay, az; + double bx, by, bz; + double cx, cy, cz; + double nx, ny, nz; + double hhx, hhy, hhz; + double darea, dvol, h; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + k = edge_opposite[nr]; + l = edge_nexttetra[nr]; + + Edge_visited[tt] |= (1 << nr); + + p1 = t->p[i]; + p2 = t->p[j]; + + double area = 0; + + cx = DTC[tt].cx; + cy = DTC[tt].cy; + cz = DTC[tt].cz; + + count = 0; + + prev = t; + prevc = &DTC[tt]; + do + { + nn = prev->t[l]; + next = &DT[nn]; + nextc = &DTC[nn]; + + if(prev != t && next != t) + { + ax = prevc->cx - cx; + ay = prevc->cy - cy; + az = prevc->cz - cz; + + bx = nextc->cx - cx; + by = nextc->cy - cy; + bz = nextc->cz - cz; + + nx = ay * bz - az * by; + ny = az * bx - ax * bz; + nz = ax * by - ay * bx; + + darea = 0.5 * sqrt(nx * nx + ny * ny + nz * nz); + area += darea; + } + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + /* need to determine the edge number to be able to flag it */ + + for(nr_next = 0; nr_next < 6; nr_next++) + if((edge_start[nr_next] == ii && edge_end[nr_next] == jj) || (edge_start[nr_next] == jj && edge_end[nr_next] == ii)) + { + if((Edge_visited[nn] & (1 << nr_next)) && next != t) + terminate("inconsistency"); + + Edge_visited[nn] |= (1 << nr_next); + break; + } + + prev = next; + prevc = nextc; + i = ii; + l = ll; + j = jj; + k = kk; + + count++; + + if(count > 1000) + terminate("count is too large"); + } + while(next != t); + + i = edge_start[nr]; + j = edge_end[nr]; + + hhx = 0.5 * (DP[p1].x - DP[p2].x); + hhy = 0.5 * (DP[p1].y - DP[p2].y); + hhz = 0.5 * (DP[p1].z - DP[p2].z); + + h = sqrt(hhx * hhx + hhy * hhy + hhz * hhz); + dvol = (1.0 / 3) * area * h; + + if(p1 >= 0 && p1 < DeRefMesh.Ndp) + vol[p1] += dvol; + + if(p2 >= 0 && p2 < DeRefMesh.Ndp) + vol[p2] += dvol; +} + +/*! \brief Insert a point into mesh. + * + * Finds the tetrahedron that contains this point, splits the tetrahedron. + * After this, flip the edges if needed restore Delaunayhood (which is applied + * recursively) until a valid Delaunay mesh is restored. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp index of Delaunay point in DP array. + * \param[in] ttstart initial guess in which triangle it might be, + * index in DT array. + * + * \return index to tetra that (currently) contains the point pp. + */ +int insert_point(tessellation *T, int pp, int ttstart) +{ + int tt0, tt1, tt2, tt3, tt4, tetra_with_p, tt; + int to_check[STACKSIZE_TETRA], freestack[STACKSIZE_TETRA]; + int n_faces_to_check = 0, nfree_on_stack = 0, moves; + int tip_index, flag, edgeface_nr; + int non_convex, convex_edge = 0, i, j; + + /* first, need to do a point location */ + tt0 = get_tetra(T, &T->DP[pp], &moves, ttstart, &flag, &edgeface_nr); + + tetra_with_p = tt0; + + if(flag == 1) /* that's the normal split of a tetrahedron into 4 */ + { + if(n_faces_to_check >= STACKSIZE_TETRA - 4) + terminate("stacksize exceeded"); + + /* we now need to split this tetrahedron into four */ + if(nfree_on_stack) + tt1 = freestack[--nfree_on_stack]; + else + tt1 = T->Ndt++; + + if(nfree_on_stack) + tt2 = freestack[--nfree_on_stack]; + else + tt2 = T->Ndt++; + + if(nfree_on_stack) + tt3 = freestack[--nfree_on_stack]; + else + tt3 = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + make_a_1_to_4_flip(T, pp, tt0, tt1, tt2, tt3); + + /* now we have a triangulation again - need to check whether there are + facets that are not Delaunay */ + /* let's initialize a stack with the facets that we need to check */ + + n_faces_to_check = 0; + + to_check[n_faces_to_check++] = tt0; + to_check[n_faces_to_check++] = tt1; + to_check[n_faces_to_check++] = tt2; + to_check[n_faces_to_check++] = tt3; + char *DTF = T->DTF; + DTF[tt0] = 0; + DTF[tt1] = 0; + DTF[tt2] = 0; + DTF[tt3] = 0; + } + + if(flag == 2) + { + /* create four new tetra */ + if(nfree_on_stack) + tt1 = freestack[--nfree_on_stack]; + else + tt1 = T->Ndt++; + + if(nfree_on_stack) + tt2 = freestack[--nfree_on_stack]; + else + tt2 = T->Ndt++; + + if(nfree_on_stack) + tt3 = freestack[--nfree_on_stack]; + else + tt3 = T->Ndt++; + + if(nfree_on_stack) + tt4 = freestack[--nfree_on_stack]; + else + tt4 = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + n_faces_to_check = 0; + + to_check[n_faces_to_check++] = tt0; + to_check[n_faces_to_check++] = T->DT[tt0].t[edgeface_nr]; + to_check[n_faces_to_check++] = tt1; + to_check[n_faces_to_check++] = tt2; + to_check[n_faces_to_check++] = tt3; + to_check[n_faces_to_check++] = tt4; + + char *DTF = T->DTF; + DTF[tt0] = 0; + DTF[T->DT[tt0].t[edgeface_nr]] = 0; + DTF[tt1] = 0; + DTF[tt2] = 0; + DTF[tt3] = 0; + DTF[tt4] = 0; + + make_a_face_split(T, tt0, edgeface_nr, pp, tt1, tt2, tt3, tt4); + } + + if(flag == 3) /* here we need to split an edge */ + { + int i, j, k, l, ii, jj, kk, ll, m, count; + int prev, next; + + /* count how many triangles share the edge */ + i = edge_start[edgeface_nr]; + j = edge_end[edgeface_nr]; + k = edge_opposite[edgeface_nr]; + l = edge_nexttetra[edgeface_nr]; + + count = 0; + n_faces_to_check = 0; + + prev = tt0; + do + { + to_check[n_faces_to_check++] = prev; + T->DTF[prev] = 0; + + tetra *DT = T->DT; + next = DT[prev].t[l]; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(DT[next].p[m] == DT[prev].p[k]) + ll = m; + if(DT[next].p[m] == DT[prev].p[i]) + ii = m; + if(DT[next].p[m] == DT[prev].p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + prev = next; + i = ii; + l = ll; + j = jj; + k = kk; + + count++; + + if(count > 1000) + terminate("count exceeded"); + } + while(next != tt0); + + int *ttlist = mymalloc_movable(&ttlist, "ttlist", count * sizeof(int)); + + for(i = 0; i < count; i++) + { + if(nfree_on_stack) + ttlist[i] = freestack[--nfree_on_stack]; + else + { + ttlist[i] = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, + T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + } + + to_check[n_faces_to_check++] = ttlist[i]; + T->DTF[ttlist[i]] = 0; + } + + make_an_edge_split(T, tt0, edgeface_nr, count, pp, ttlist); + + myfree(ttlist); + } + + int iter = 0; + + while(n_faces_to_check) + { + iter++; + if(iter > 200000) + terminate("too many iterations"); + + tt = to_check[--n_faces_to_check]; /* this is the current tetra to look at. + The facet in question lies opposite to q */ + if(T->DT[tt].t[0] < 0) /* deleted? */ + continue; + + for(tip_index = 0; tip_index < 4; tip_index++) + if(T->DT[tt].p[tip_index] == pp) + break; + + if(tip_index < 4) /* otherwise the facet has been removed in a 3-2 flip */ + { + tetra *DT = T->DT; + point *DP = T->DP; + int qq = DT[tt].t[tip_index]; /* tetrahedron that's opposite of ours and shares the facet */ + int ppp = DT[qq].p[DT[tt].s[tip_index]]; /* point that's opposite of the facet in the other tetrahedron */ + + int ret, ret_exact; + + ret = InSphere_Errorbound(&DP[DT[qq].p[0]], &DP[DT[qq].p[1]], &DP[DT[qq].p[2]], &DP[DT[qq].p[3]], &DP[pp]); + CountInSphereTests++; + + if(ret != 0) + ret_exact = ret; + else + { + // let's decide with exact integer arithmetic + ret_exact = InSphere_Exact(&DP[DT[qq].p[0]], &DP[DT[qq].p[1]], &DP[DT[qq].p[2]], &DP[DT[qq].p[3]], &DP[pp]); + CountInSphereTestsExact++; + } + + if(ret_exact > 0) /* facet is illegal, because point lies inside */ + { + /* let's see whether the point lies in the triangle, or on a side, or opposite of one convex edge */ + + non_convex = convex_edge_test(T, tt, tip_index, &convex_edge); + + if(non_convex == 0) /* we can make a 2-3 flip */ + { + int ww; + + if(nfree_on_stack) + ww = freestack[--nfree_on_stack]; + else + ww = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, + T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + if(n_faces_to_check >= STACKSIZE_TETRA - 3) + terminate("stacksize exceeded"); + + make_a_2_to_3_flip(T, tt, tip_index, qq, T->DT[tt].s[tip_index], ppp, ww); + + to_check[n_faces_to_check++] = tt; + to_check[n_faces_to_check++] = qq; + to_check[n_faces_to_check++] = ww; + T->DTF[tt] = 0; + T->DTF[qq] = 0; + T->DTF[ww] = 0; + } + else if(non_convex == 1) /* we might be able to make a 3-2 flip, or we deal with a convex edge on the outer hull */ + { + /* test whether the reflex edge is surrounded by exactly three tetrahedra */ + + i = convex_edge + 2; + if(i >= 3) + i -= 3; + i = access_triangles[tip_index][i]; + + for(j = 0; j < 4; j++) + if(DT[tt].p[i] == DT[qq].p[j]) + break; + + if(j >= 4) + { + terminate("not found"); + } + + if(DT[tt].t[i] == DT[qq].t[j]) /* this means there is exactly one tetrahedron between them, i.e. we have found the + third partner for the flip */ + { + int ww; + + ww = DT[tt].t[i]; + + make_a_3_to_2_flip(T, tt, qq, ww, tip_index, convex_edge, DT[tt].s[tip_index]); + + DT[ww].t[0] = -1; /* mark as deleted */ + + if(nfree_on_stack < STACKSIZE_TETRA) + freestack[nfree_on_stack++] = ww; + else + terminate("stack full"); + + tetra_with_p = tt; + if(n_faces_to_check >= STACKSIZE_TETRA - 2) + terminate("stack too full"); + + to_check[n_faces_to_check++] = tt; + to_check[n_faces_to_check++] = qq; + T->DTF[tt] = 0; + T->DTF[qq] = 0; + } + else + { + if(DT[DT[tt].t[i]].p[DT[tt].s[i]] == DPinfinity && DT[DT[qq].t[j]].p[DT[qq].s[j]] == DPinfinity) + { + printf("convex edge between points=%d %d on outer hull found\n", + (int)(DT[tt].p[access_triangles[tip_index][convex_edge]]), + (int)(DT[tt].p[access_triangles[tip_index][convex_edge < 2 ? convex_edge + 1 : 0]])); + + terminate("inconsistency"); /* this should not occur since we have embedded the points into a convex big + triangle */ + } + } + } + else if(non_convex == 2) /* we might be able to make a 4-4 flip */ + { + i = convex_edge + 2; + if(i >= 3) + i -= 3; + i = access_triangles[tip_index][i]; /* this is the point opposite of edge (but not tip) */ + + tetra *DT = T->DT; + char *DTF = T->DTF; + + for(j = 0; j < 4; j++) + if(DT[tt].p[i] == DT[qq].p[j]) + break; + + if(DT[DT[tt].t[i]].p[DT[tt].s[i]] == DT[DT[qq].t[j]].p[DT[qq].s[j]]) + { + /* ok, so we really have 4 tetra. The opposite points match up */ + + to_check[n_faces_to_check++] = tt; + to_check[n_faces_to_check++] = qq; + to_check[n_faces_to_check++] = DT[tt].t[i]; + to_check[n_faces_to_check++] = DT[qq].t[j]; + DTF[tt] = 0; + DTF[qq] = 0; + DTF[DT[tt].t[i]] = 0; + DTF[DT[qq].t[j]] = 0; + + make_a_4_to_4_flip(T, tt, tip_index, convex_edge); + } + } + } + else + tetra_with_p = tt; + } + } + + return tetra_with_p; +} + +/*! \brief Tests edges and detects if a flip is needed. + * + * \param[in] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[in] tip Index of forth point (tip of tetrahedron). + * \param[out] edgenr Index of edge. + * + * \return (-1,0,1,2), depending on which flip is necessary. + */ +int convex_edge_test(tessellation *T, int tt, int tip, int *edgenr) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra *t = &DT[tt]; + int i0, i1, i2, i3; + int vol, flag0, flag1, flag2; + int count_zeros = 0; + + i0 = access_triangles[tip][0]; + i1 = access_triangles[tip][1]; + i2 = access_triangles[tip][2]; + i3 = tip; + + point *p0 = &DP[t->p[i0]]; + point *p1 = &DP[t->p[i1]]; + point *p2 = &DP[t->p[i2]]; + point *p3 = &DP[t->p[i3]]; + point *p4 = &DP[DT[t->t[i3]].p[t->s[i3]]]; + + CountConvexEdgeTest++; + +#ifndef OPTIMIZE_MEMORY_USAGE + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + double az = p1->zz - p0->zz; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + double bz = p2->zz - p0->zz; + + double cx = p3->xx - p0->xx; + double cy = p3->yy - p0->yy; + double cz = p3->zz - p0->zz; + + double qx = p4->xx - p0->xx; + double qy = p4->yy - p0->yy; + double qz = p4->zz - p0->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double ax, ay, az, bx, by, bz, cx, cy, cz, qx, qy, qz; + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p0, pA_ixyz, pA_xyz); + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p4, pB_ixyz, pB_xyz); + qx = pB_xyz[0] - pA_xyz[0]; + qy = pB_xyz[1] - pA_xyz[1]; + qz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + + double mv_data[] = {ax, bx, cx, qx, ay, by, cy, qy, az, bz, cz, qz}; + double x[3]; + + int status; + + status = solve_linear_equations(mv_data, x); + + /* x now contains the coordinates of the point p4 expanded in the basis (a,b,c) */ + /* the coordinates of point 3 in this basis are (0,0,1) */ + + if(status >= 0) + { + if(fabs(1.0 - x[2]) < INSIDE_EPS) + terminate("inconsistency"); + + double u, v, w; + + w = 1.0 / (1.0 - x[2]); + + u = w * x[0]; + v = w * x[1]; + + if(u > INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS) + { + /* we have a point safely in the triangle: 2-3 flip should be fine */ + return 0; + } + + if(u > INSIDE_EPS && v < -INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS) + { + /* edge 0 is clearly reflect, 3-2 flip allowed around edge 0 */ + *edgenr = 0; + return 1; + } + + if(u > INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) < -INSIDE_EPS) + { + // printf("3-2 flip allowed since edge 1 is reflex\n"); + *edgenr = 1; + return 1; + } + + if(u < -INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS) + { + // printf("3-2 flip allowed since edge 2 is reflex\n"); + *edgenr = 2; + return 1; + } + + if(u < -INSIDE_EPS && v < -INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS) + return -1; /* two reflex edges */ + + if(u < -INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) < -INSIDE_EPS) + return -1; /* two reflex edges */ + + if(u > INSIDE_EPS && v < -INSIDE_EPS && (1 - (u + v)) < -INSIDE_EPS) + return -1; /* two reflex edges */ + } + + CountConvexEdgeTestExact++; + + /* Now we need to test in more detail if we are on one of the edges */ + + vol = Orient3d_Exact(p0, p1, p2, p3); + + if(vol <= 0) + { + printf("flat or negatively tetrahedron found (vol=%d)\n", vol); + { + printf("p0=%d %g %g %g\n", (int)(p0 - DP), p0->x, p0->y, p0->z); + printf("p1=%d %g %g %g\n", (int)(p1 - DP), p1->x, p1->y, p1->z); + printf("p2=%d %g %g %g\n", (int)(p2 - DP), p2->x, p2->y, p2->z); + printf("p3=%d %g %g %g\n", (int)(p3 - DP), p3->x, p3->y, p3->z); + dump_points(T); + terminate("inconsistent tetrahedron"); + } + } + + flag0 = Orient3d_Exact(p1, p3, p2, p4); + flag1 = Orient3d_Exact(p0, p2, p3, p4); + flag2 = Orient3d_Exact(p0, p3, p1, p4); + + if(flag0 == 0) + count_zeros++; + + if(flag1 == 0) + count_zeros++; + + if(flag2 == 0) + count_zeros++; + + if(flag0 >= 0 && flag1 >= 0 && flag2 < 0) + { + // printf("3-2 flip allowed since edge 0 is reflex\n"); + *edgenr = 0; + return 1; + } + + if(flag0 < 0 && flag1 >= 0 && flag2 >= 0) + { + // printf("3-2 flip allowed since edge 1 is reflex\n"); + *edgenr = 1; + return 1; + } + + if(flag0 >= 0 && flag1 < 0 && flag2 >= 0) + { + // printf("3-2 flip allowed since edge 2 is reflex\n"); + *edgenr = 2; + return 1; + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 == 0) + { + // printf("4-4 flip around edge 0 may be possible\n"); + *edgenr = 0; + return 2; + } + + if(flag0 >= 0 && flag1 == 0 && flag2 >= 0) + { + // printf("4-4 flip around edge 2 may be possible\n"); + *edgenr = 2; + return 2; + } + + if(flag0 == 0 && flag1 >= 0 && flag2 >= 0) + { + // printf("4-4 flip around edge 1 may be possible\n"); + *edgenr = 1; + return 2; + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0) + { + /* we seem to have a point in the triangle: 2-3 flip should be fine */ + return 0; + } + + return -1; +} + +/*! \brief Performs face split. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt0 First index in DT array. + * \param[in] face_nr Index of face. + * \param[in] pp Index of point. + * \param[in] tt1 Second index in DT array. + * \param[in] tt2 Third index in DT array. + * \param[in] qq1 Index in DT array. + * \param[in] qq2 Index in DT array. + * + * \return void + */ +void make_a_face_split(tessellation *T, int tt0, int face_nr, int pp, int tt1, int tt2, int qq1, int qq2) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + int qq0 = t0->t[face_nr]; + tetra *q0 = &DT[qq0]; + tetra *q1 = &DT[qq1]; + tetra *q2 = &DT[qq2]; + + int m, i0 = -1, i1 = -1, i2 = -1, i3 = -1, j0 = -1, j1 = -1, j2 = -1, j3 = -1; + + Count_FaceSplits++; + CountFlips++; + + *t1 = *t0; + *t2 = *t0; + + *q1 = *q0; + *q2 = *q0; + + i3 = face_nr; + j3 = t0->s[face_nr]; + + switch(i3) + { + case 3: + i0 = 0; + i1 = 1; + i2 = 2; + break; + case 2: + i0 = 0; + i1 = 3; + i2 = 1; + break; + case 1: + i0 = 0; + i1 = 2; + i2 = 3; + break; + case 0: + i0 = 1; + i1 = 3; + i2 = 2; + break; + } + + for(m = 0; m < 4; m++) + { + if(q0->p[m] == t0->p[i0]) + j0 = m; + if(q0->p[m] == t0->p[i1]) + j2 = m; + if(q0->p[m] == t0->p[i2]) + j1 = m; + } + + if(i0 < 0 || i1 < 0 || i2 < 0 || i3 < 0 || j0 < 0 || j1 < 0 || j2 < 0 || j3 < 0) + terminate("inconsistency"); + + t0->p[i2] = pp; + t1->p[i0] = pp; + t2->p[i1] = pp; + + q0->p[j1] = pp; + q1->p[j0] = pp; + q2->p[j2] = pp; + + t0->t[i0] = tt1; + t1->t[i2] = tt0; + t0->s[i0] = i2; + t1->s[i2] = i0; + + t1->t[i1] = tt2; + t2->t[i0] = tt1; + t1->s[i1] = i0; + t2->s[i0] = i1; + + t2->t[i2] = tt0; + t0->t[i1] = tt2; + t2->s[i2] = i1; + t0->s[i1] = i2; + + q0->t[j0] = qq1; + q1->t[j1] = qq0; + q0->s[j0] = j1; + q1->s[j1] = j0; + + q1->t[j2] = qq2; + q2->t[j0] = qq1; + q1->s[j2] = j0; + q2->s[j0] = j2; + + q2->t[j1] = qq0; + q0->t[j2] = qq2; + q2->s[j1] = j2; + q0->s[j2] = j1; + + t0->t[i3] = qq0; + q0->t[j3] = tt0; + t0->s[i3] = j3; + q0->s[j3] = i3; + + t1->t[i3] = qq1; + q1->t[j3] = tt1; + t1->s[i3] = j3; + q1->s[j3] = i3; + + t2->t[i3] = qq2; + q2->t[j3] = tt2; + t2->s[i3] = j3; + q2->s[j3] = i3; + + DT[t0->t[i2]].t[t0->s[i2]] = tt0; + DT[t1->t[i0]].t[t1->s[i0]] = tt1; + DT[t2->t[i1]].t[t2->s[i1]] = tt2; + + DT[q0->t[j1]].t[q0->s[j1]] = qq0; + DT[q1->t[j0]].t[q1->s[j0]] = qq1; + DT[q2->t[j2]].t[q2->s[j2]] = qq2; +} + +/*! \brief Performs edge split. + * + * \param[in, out] T Pointer to tessellation + * \param[in] tt0 Index in DT array + * \param[in] edge_nr Index of edge + * \param[in] count Number of elements in lists. + * \param[in] pp Index to point. + * \param[in] ttlist List of indices in DT. + */ +void make_an_edge_split(tessellation *T, int tt0, int edge_nr, int count, int pp, int *ttlist) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *prev, *next; + tetra **tlist, **t_orig_list; + int *i_list, *j_list, *k_list, *l_list; + int i, j, k, l, ii, jj, kk, ll, m, nr, nrm, nrp; + + Count_EdgeSplits++; + CountFlips++; + + tlist = mymalloc("tlist", count * sizeof(tetra *)); + t_orig_list = mymalloc("t_orig_list", count * sizeof(tetra *)); + i_list = mymalloc("i_list", sizeof(int) * count); + j_list = mymalloc("j_list", sizeof(int) * count); + k_list = mymalloc("k_list", sizeof(int) * count); + l_list = mymalloc("l_list", sizeof(int) * count); + + for(i = 0; i < count; i++) + tlist[i] = &DT[ttlist[i]]; + + i = edge_start[edge_nr]; + j = edge_end[edge_nr]; + k = edge_opposite[edge_nr]; + l = edge_nexttetra[edge_nr]; + + nr = 0; + prev = t0; + do + { + t_orig_list[nr] = prev; + i_list[nr] = i; + j_list[nr] = j; + k_list[nr] = k; + l_list[nr] = l; + + next = &DT[prev->t[l]]; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + prev = next; + i = ii; + l = ll; + j = jj; + k = kk; + + nr++; + } + while(next != t0); + + for(nr = 0; nr < count; nr++) + { + *tlist[nr] = *t_orig_list[nr]; + + t_orig_list[nr]->p[j_list[nr]] = pp; + tlist[nr]->p[i_list[nr]] = pp; + + t_orig_list[nr]->t[i_list[nr]] = tlist[nr] - DT; + tlist[nr]->t[j_list[nr]] = t_orig_list[nr] - DT; + + t_orig_list[nr]->s[i_list[nr]] = j_list[nr]; + tlist[nr]->s[j_list[nr]] = i_list[nr]; + + DT[tlist[nr]->t[i_list[nr]]].t[tlist[nr]->s[i_list[nr]]] = tlist[nr] - DT; + + nrp = nr + 1; + if(nrp >= count) + nrp -= count; + + nrm = nr - 1; + if(nrm < 0) + nrm += count; + + tlist[nr]->t[l_list[nr]] = tlist[nrp] - DT; + tlist[nr]->s[l_list[nr]] = k_list[nrp]; + + tlist[nr]->t[k_list[nr]] = tlist[nrm] - DT; + tlist[nr]->s[k_list[nr]] = l_list[nrm]; + } + + myfree(l_list); + myfree(k_list); + myfree(j_list); + myfree(i_list); + + myfree(t_orig_list); + myfree(tlist); +} + +/*! \brief Make a 4 to 4 flip. + * + * See Springel (2010) for discussion on flips. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[in] tip_index Index of the point making up the tip of the + * tetrahedron. + * \param[in] edge_nr Index of edge. + * + * \return void + */ +void make_a_4_to_4_flip(tessellation *T, int tt, int tip_index, int edge_nr) +{ + tetra *DT = T->DT; + // printf("4-to-4 flip\n"); + tetra *t = &DT[tt]; + int i0, i1, i2, j; + int ww, qq, uu; + tetra *w, *q, *u; + tetra *t_top[4], *t_bottom[4]; + int s_top[4], s_bottom[4]; + int p[6]; + + Count_4_to_4_Flips++; + CountFlips++; + + uu = 0; + u = NULL; + + for(j = 0; j < 4; j++) + { + t_top[j] = NULL; + t_bottom[j] = NULL; + s_top[j] = -1; + s_bottom[j] = -1; + } + + i0 = access_triangles[tip_index][edge_nr]; + edge_nr += 1; + if(edge_nr >= 3) + edge_nr -= 3; + i1 = access_triangles[tip_index][edge_nr]; + edge_nr += 1; + if(edge_nr >= 3) + edge_nr -= 3; + i2 = access_triangles[tip_index][edge_nr]; + + t_top[0] = &DT[t->t[i0]]; + s_top[0] = t->s[i0]; + + t_top[1] = &DT[t->t[i1]]; + s_top[1] = t->s[i1]; + + ww = t->t[i2]; + w = &DT[ww]; + qq = t->t[tip_index]; + q = &DT[qq]; + + for(j = 0; j < 4; j++) + { + if(w->p[j] == t->p[i0]) + { + t_top[3] = &DT[w->t[j]]; + s_top[3] = w->s[j]; + } + + if(w->p[j] == t->p[i1]) + { + t_top[2] = &DT[w->t[j]]; + s_top[2] = w->s[j]; + } + + if(w->p[j] == t->p[tip_index]) + { + uu = w->t[j]; + u = &DT[uu]; + } + } + + for(j = 0; j < 4; j++) + { + if(u->p[j] == t->p[i0]) + { + t_bottom[3] = &DT[u->t[j]]; + s_bottom[3] = u->s[j]; + } + + if(u->p[j] == t->p[i1]) + { + t_bottom[2] = &DT[u->t[j]]; + s_bottom[2] = u->s[j]; + } + + if(q->p[j] == t->p[i0]) + { + t_bottom[0] = &DT[q->t[j]]; + s_bottom[0] = q->s[j]; + } + + if(q->p[j] == t->p[i1]) + { + t_bottom[1] = &DT[q->t[j]]; + s_bottom[1] = q->s[j]; + } + } + + p[0] = t->p[i1]; + p[1] = t->p[i2]; + p[2] = t->p[i0]; + p[3] = DT[t->t[i2]].p[t->s[i2]]; + p[4] = t->p[tip_index]; + p[5] = DT[t->t[tip_index]].p[t->s[tip_index]]; + + for(j = 0; j < 4; j++) + { + if(t_top[j] == NULL || t_bottom[j] == NULL) + { + printf("bad!\n"); + terminate("inconsistency"); + } + } + + for(j = 0; j < 4; j++) + { + if(t_top[j] == NULL || t_bottom[j] == NULL) + { + printf("bad!\n"); + terminate("inconsistency"); + } + } + + t->p[0] = p[0]; + t->p[1] = p[1]; + t->p[2] = p[5]; + t->p[3] = p[4]; + + q->p[0] = p[1]; + q->p[1] = p[2]; + q->p[2] = p[5]; + q->p[3] = p[4]; + + u->p[0] = p[2]; + u->p[1] = p[3]; + u->p[2] = p[5]; + u->p[3] = p[4]; + + w->p[0] = p[3]; + w->p[1] = p[0]; + w->p[2] = p[5]; + w->p[3] = p[4]; + + t->t[0] = qq; + q->t[1] = tt; + t->s[0] = 1; + q->s[1] = 0; + + q->t[0] = uu; + u->t[1] = qq; + q->s[0] = 1; + u->s[1] = 0; + + u->t[0] = ww; + w->t[1] = uu; + u->s[0] = 1; + w->s[1] = 0; + + w->t[0] = tt; + t->t[1] = ww; + w->s[0] = 1; + t->s[1] = 0; + + t->t[2] = t_top[0] - DT; + t->s[2] = s_top[0]; + DT[t->t[2]].t[t->s[2]] = tt; + DT[t->t[2]].s[t->s[2]] = 2; + + t->t[3] = t_bottom[0] - DT; + t->s[3] = s_bottom[0]; + DT[t->t[3]].t[t->s[3]] = tt; + DT[t->t[3]].s[t->s[3]] = 3; + + q->t[2] = t_top[1] - DT; + q->s[2] = s_top[1]; + DT[q->t[2]].t[q->s[2]] = qq; + DT[q->t[2]].s[q->s[2]] = 2; + + q->t[3] = t_bottom[1] - DT; + q->s[3] = s_bottom[1]; + DT[q->t[3]].t[q->s[3]] = qq; + DT[q->t[3]].s[q->s[3]] = 3; + + u->t[2] = t_top[2] - DT; + u->s[2] = s_top[2]; + DT[u->t[2]].t[u->s[2]] = uu; + DT[u->t[2]].s[u->s[2]] = 2; + + u->t[3] = t_bottom[2] - DT; + u->s[3] = s_bottom[2]; + DT[u->t[3]].t[u->s[3]] = uu; + DT[u->t[3]].s[u->s[3]] = 3; + + w->t[2] = t_top[3] - DT; + w->s[2] = s_top[3]; + DT[w->t[2]].t[w->s[2]] = ww; + DT[w->t[2]].s[w->s[2]] = 2; + + w->t[3] = t_bottom[3] - DT; + w->s[3] = s_bottom[3]; + DT[w->t[3]].t[w->s[3]] = ww; + DT[w->t[3]].s[w->s[3]] = 3; +} + +/*! \brief Make a 1 to 4 flip. + * + * See Springel (2010) for discussion on flips. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of new point. + * \param[in] tt0 Index or first point in DT array. + * \param[in] tt1 Index of second point in DT array. + * \param[in] tt2 Index of third point in DT array. + * \param[in] tt3 Index of forth point in DT array. + * + * \return void + */ +void make_a_1_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3) +{ + tetra *DT = T->DT; + + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + tetra *t3 = &DT[tt3]; + + Count_1_to_4_Flips++; + CountFlips++; + + *t1 = *t0; + *t2 = *t0; + *t3 = *t0; + + t0->p[0] = pp; + t1->p[1] = pp; + t2->p[2] = pp; + t3->p[3] = pp; + + t0->t[1] = tt1; + t1->t[0] = tt0; + t0->s[1] = 0; + t1->s[0] = 1; + + t1->t[2] = tt2; + t2->t[1] = tt1; + t1->s[2] = 1; + t2->s[1] = 2; + + t2->t[0] = tt0; + t0->t[2] = tt2; + t2->s[0] = 2; + t0->s[2] = 0; + + t0->t[3] = tt3; + t3->t[0] = tt0; + t0->s[3] = 0; + t3->s[0] = 3; + + t1->t[3] = tt3; + t3->t[1] = tt1; + t1->s[3] = 1; + t3->s[1] = 3; + + t2->t[3] = tt3; + t3->t[2] = tt2; + t2->s[3] = 2; + t3->s[2] = 3; + + DT[t0->t[0]].t[t0->s[0]] = tt0; + DT[t1->t[1]].t[t1->s[1]] = tt1; + DT[t2->t[2]].t[t2->s[2]] = tt2; + DT[t3->t[3]].t[t3->s[3]] = tt3; +} + +/*! \brief Make a 3 to 2 flip. + * + * See Springel (2010) for discussion on flips. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of new point. + * \param[in] tt0 Index or first point in DT array. + * \param[in] tt1 Index of second point in DT array. + * \param[in] tt2 Index of third point in DT array. + * \param[in] tip Index of point making up tip of tetrahedron. + * \param[in] edge Index of edge. + * \param[in] bottom Tetrahedron on bottom. + * + * \return void + */ +void make_a_3_to_2_flip(tessellation *T, int tt0, int tt1, int tt2, int tip, int edge, int bottom) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + + int i, j, k, ii, jj, iii, jjj; + tetra qbak, tbak, wbak; + + Count_3_to_2_Flips++; + CountFlips++; + + tbak = *t0; + qbak = *t1; + wbak = *t2; + + i = edge; + j = i + 1; + k = i + 2; + if(j >= 3) + j -= 3; + if(k >= 3) + k -= 3; + + i = access_triangles[tip][i]; + j = access_triangles[tip][j]; + k = access_triangles[tip][k]; + + for(ii = 0; ii < 4; ii++) + if(tbak.p[i] == qbak.p[ii]) + break; + + for(iii = 0; iii < 4; iii++) + if(tbak.p[i] == wbak.p[iii]) + break; + + for(jj = 0; jj < 4; jj++) + if(tbak.p[j] == qbak.p[jj]) + break; + + for(jjj = 0; jjj < 4; jjj++) + if(tbak.p[j] == wbak.p[jjj]) + break; + + t0->p[0] = qbak.p[bottom]; + t0->p[1] = tbak.p[k]; + t0->p[2] = tbak.p[i]; + t0->p[3] = tbak.p[tip]; + + t1->p[0] = qbak.p[bottom]; + t1->p[1] = tbak.p[j]; + t1->p[2] = tbak.p[k]; + t1->p[3] = tbak.p[tip]; + + t0->t[2] = tt1; + t1->t[1] = tt0; + t0->s[2] = 1; + t1->s[1] = 2; + + t0->t[0] = tbak.t[j]; + t0->s[0] = tbak.s[j]; + DT[t0->t[0]].s[t0->s[0]] = 0; + DT[t0->t[0]].t[t0->s[0]] = tt0; + + t0->t[3] = qbak.t[jj]; + t0->s[3] = qbak.s[jj]; + DT[t0->t[3]].s[t0->s[3]] = 3; + DT[t0->t[3]].t[t0->s[3]] = tt0; + + t0->t[1] = wbak.t[jjj]; + t0->s[1] = wbak.s[jjj]; + DT[t0->t[1]].s[t0->s[1]] = 1; + DT[t0->t[1]].t[t0->s[1]] = tt0; + + t1->t[0] = tbak.t[i]; + t1->s[0] = tbak.s[i]; + DT[t1->t[0]].s[t1->s[0]] = 0; + DT[t1->t[0]].t[t1->s[0]] = tt1; + + t1->t[3] = qbak.t[ii]; + t1->s[3] = qbak.s[ii]; + DT[t1->t[3]].s[t1->s[3]] = 3; + DT[t1->t[3]].t[t1->s[3]] = tt1; + + t1->t[2] = wbak.t[iii]; + t1->s[2] = wbak.s[iii]; + DT[t1->t[2]].s[t1->s[2]] = 2; + DT[t1->t[2]].t[t1->s[2]] = tt1; + + CountFlips++; +} + +/*! \brief Make a 2 to 3 flip + * + * See Springel (2010) for discussion on flips. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of new point. + * \param[in] tt0 Index or first point in DT array. + * \param[in] tip Index of point makting up tip of tetrahedron. + * \param[in] tt1 Index of second point in DT array. + * \param[in] bottom Tetrahedron on bottom. + * \param[in] qq Index of point. + * \param[in] tt2 Index of third point in DT array. + * + * \return void + */ +void make_a_2_to_3_flip(tessellation *T, int tt0, int tip, int tt1, int bottom, int qq, int tt2) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + tetra qbak, tbak; + int k; + + Count_2_to_3_Flips++; + + tbak = *t0; + qbak = *t1; /* to save info */ + + *t1 = *t0; + *t2 = *t0; + + /* redefine points */ + t0->p[access_triangles[tip][0]] = qq; + t1->p[access_triangles[tip][1]] = qq; + t2->p[access_triangles[tip][2]] = qq; + + /* make neighbour connections */ + t0->t[access_triangles[tip][1]] = tt1; + t1->t[access_triangles[tip][0]] = tt0; + t0->s[access_triangles[tip][1]] = access_triangles[tip][0]; + t1->s[access_triangles[tip][0]] = access_triangles[tip][1]; + + t0->t[access_triangles[tip][2]] = tt2; + t2->t[access_triangles[tip][0]] = tt0; + t0->s[access_triangles[tip][2]] = access_triangles[tip][0]; + t2->s[access_triangles[tip][0]] = access_triangles[tip][2]; + + t1->t[access_triangles[tip][2]] = tt2; + t2->t[access_triangles[tip][1]] = tt1; + t1->s[access_triangles[tip][2]] = access_triangles[tip][1]; + t2->s[access_triangles[tip][1]] = access_triangles[tip][2]; + + /* these are the ones on the top */ + DT[t0->t[access_triangles[tip][0]]].t[t0->s[access_triangles[tip][0]]] = tt0; + DT[t1->t[access_triangles[tip][1]]].t[t1->s[access_triangles[tip][1]]] = tt1; + DT[t2->t[access_triangles[tip][2]]].t[t2->s[access_triangles[tip][2]]] = tt2; + + /* now the one at the bottom */ + + if(qbak.p[access_triangles[bottom][0]] == tbak.p[access_triangles[tip][0]]) + k = 0; + else if(qbak.p[access_triangles[bottom][1]] == tbak.p[access_triangles[tip][0]]) + k = 1; + else + k = 2; + + t0->t[tip] = qbak.t[access_triangles[bottom][k]]; + t0->s[tip] = qbak.s[access_triangles[bottom][k]]; + DT[t0->t[tip]].t[t0->s[tip]] = tt0; + DT[t0->t[tip]].s[t0->s[tip]] = tip; + + if(qbak.p[access_triangles[bottom][0]] == tbak.p[access_triangles[tip][1]]) + k = 0; + else if(qbak.p[access_triangles[bottom][1]] == tbak.p[access_triangles[tip][1]]) + k = 1; + else + k = 2; + + t1->t[tip] = qbak.t[access_triangles[bottom][k]]; + t1->s[tip] = qbak.s[access_triangles[bottom][k]]; + DT[t1->t[tip]].t[t1->s[tip]] = tt1; + DT[t1->t[tip]].s[t1->s[tip]] = tip; + + if(qbak.p[access_triangles[bottom][0]] == tbak.p[access_triangles[tip][2]]) + k = 0; + else if(qbak.p[access_triangles[bottom][1]] == tbak.p[access_triangles[tip][2]]) + k = 1; + else + k = 2; + + t2->t[tip] = qbak.t[access_triangles[bottom][k]]; + t2->s[tip] = qbak.s[access_triangles[bottom][k]]; + DT[t2->t[tip]].t[t2->s[tip]] = tt2; + DT[t2->t[tip]].s[t2->s[tip]] = tip; +} + +static int ErrorFlag = 0; + +/*! \brief Gets tetrahedron. + * + * Returns the index of the tetrahedron containing the point DP[pp]. + * The search is started from the tetrahedron DT[ttstart]. + * + * \param[in] T Pointer to tessellation. + * \param[in] p Point. + * \param[out] moves The number of moves necessary to find tetrahedron. + * \param[out] flag The return value from InTetra, specifying whether + * the point is inside or on the edge/face. + * \param[out] edgeface_nr The edge/face number on the tetrahedron containing + * the point, in case flag is >1. + * + * \return Index of tetrahedron. + */ +int get_tetra(tessellation *T, point *p, int *moves, int ttstart, int *flag, int *edgeface_nr) +{ + int ret, count_moves = 0; + int tt, next_tetra; + + tt = ttstart; + +#define MAX_COUNT_MOVES 1000000 + + while((ret = InTetra(T, tt, p, edgeface_nr, &next_tetra)) == 0) + { + count_moves++; + + if(count_moves > MAX_COUNT_MOVES) + { + ErrorFlag = 1; + + if(count_moves > MAX_COUNT_MOVES + 10) + terminate("too many moves"); + } + + tt = next_tetra; + } + + *moves = count_moves; + *flag = ret; + + return tt; +} + +/*! \brief Is point in tetrahedron? + * + * Tests whether point DP[pp] lies in the tetrahedron DT[tt]. The + * return value is 0 if the point is outside, 1 if it's inside, 2 if + * it's on a face, and 3 if it's on an edge. If it's either of the + * last two, the edgeface_nr is set to the corresponding index of the + * edge or face. If the point is outside, nexttetra is set to the + * index of a neighboring tetrahedron in the direction of the + * point, otherwise it's unmodified. + * + * \param[in] T Tesslation. + * \param[in] tt Index of tetrahedron in DT array. + * \param[in] p Point. + * \param[out] edgeface_nr The edge/face number on the tetrahedron containing + * the point, in case flag is >1. + * \param[out] nexttetra Index of tetrahedron. + * + * \return Point in thetrahedron? + * + */ +int InTetra(tessellation *T, int tt, point *p, int *edgeface_nr, int *nexttetra) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra *t = &DT[tt]; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + // test if we are in an "infinity tetra", which are the ones that + // bound the tesselated volume. Arepo terminates if this happens, + // but for Sunrise this is a valid occurence so we'll return -1 to + // indicate the point is outside the volume. XXX Actually it + // shouldn't do this anymore because we now do box tests instead + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + { +#ifndef LONGIDS + printf("task=%d: we are in a tetraeder with an infinity point. tetra=%d, coordinates of point=(%g|%g|%g) ID=%d\n", ThisTask, tt, + p->x, p->y, p->z, p->ID); +#else /* #ifndef LONGIDS */ + printf("task=%d: we are in a tetraeder with an infinity point. tetra=%d, coordinates of point=(%g|%g|%g) ID=%llu\n", ThisTask, + tt, p->x, p->y, p->z, p->ID); +#endif /* #ifndef LONGIDS #else */ + terminate("invalid tetrahedron"); + } + + Count_InTetra++; + +#ifndef OPTIMIZE_MEMORY_USAGE + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + double az = p1->zz - p0->zz; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + double bz = p2->zz - p0->zz; + + double cx = p3->xx - p0->xx; + double cy = p3->yy - p0->yy; + double cz = p3->zz - p0->zz; + + double qx = p->xx - p0->xx; + double qy = p->yy - p0->yy; + double qz = p->zz - p0->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double ax, ay, az, bx, by, bz, cx, cy, cz, qx, qy, qz; + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p0, pA_ixyz, pA_xyz); + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p, pB_ixyz, pB_xyz); + qx = pB_xyz[0] - pA_xyz[0]; + qy = pB_xyz[1] - pA_xyz[1]; + qz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double mv_data[] = {ax, bx, cx, qx, ay, by, cy, qy, az, bz, cz, qz}; + double x[3]; + + int ivol, flag3, flag2, flag1, flag0; + int count_zeros = 0; + + int status; + + status = solve_linear_equations(mv_data, x); + + if(status < 0) + { + ivol = Orient3d_Exact(p0, p1, p2, p3); + if(ivol <= 0) + { + printf("flat or negatively tetrahedron found (ivol=%d) tt=%d\n", ivol, tt); + terminate("invalid tetrahedron"); + } + } + + /* x now contains the coordinates of the point p expanded in the basis (a,b,c) */ + + if(ErrorFlag) + { + ivol = Orient3d_Exact(p0, p1, p2, p3); + flag3 = Orient3d_Exact(p0, p1, p2, p); + flag2 = Orient3d_Exact(p0, p3, p1, p); + flag1 = Orient3d_Exact(p0, p2, p3, p); + flag0 = Orient3d_Exact(p1, p3, p2, p); + + printf("\n\nTetra=%d\n", (int)(t - DT)); + printf("ivol=%d flag0=%d %d %d %d\n", ivol, flag0, flag1, flag2, flag3); + printf("xx = %g %g %g 1-sum=%g\n", x[0], x[1], x[2], 1 - (x[0] + x[1] + x[2])); + printf("a= %g %g %g\n", ax, ay, az); + printf("b= %g %g %g\n", bx, by, bz); + printf("c= %g %g %g\n", cx, cy, cz); + printf("q= %g %g %g\n", qx, qy, qz); + printf("(axb)*c) = %g\n", (ay * bz - az * by) * cx + (az * bx - ax * bz) * cy + (ax * by - ay * bx) * cz); + printf("next tetras=%d %d %d %d\n", t->t[0], t->t[1], t->t[2], t->t[3]); + } + + if(status >= 0) + { + if(x[0] > INSIDE_EPS && x[1] > INSIDE_EPS && x[2] > INSIDE_EPS && (1 - (x[0] + x[1] + x[2])) > INSIDE_EPS) + { + /* looks like we are safely inside the tetrahedron */ + + return 1; /* our point is really nicely inside the tetrahedron */ + } + + if(x[0] < -INSIDE_EPS || x[1] < -INSIDE_EPS || x[2] < -INSIDE_EPS || (1 - (x[0] + x[1] + x[2])) < -INSIDE_EPS) + { + /* looks like we are clearly outside the tetrahedron. + Let's look for a good neighbouring tetrahedron to continue the search */ + + /* note: in the (a,b,c) basis, the center-of-mass has coordinates (1/4, 1/4, 1/4) */ + + double w, u, v; + + if(ErrorFlag) + { + w = 0.25 / (0.25 - x[2]); + u = 0.25 + w * (x[0] - 0.25); + v = 0.25 + w * (x[1] - 0.25); + printf("[3] w=%g u=%g v=%g fabs(x[2] - 0.25)=%g\n", w, u, v, fabs(x[2] - 0.25)); + + w = 0.25 / (0.25 - x[1]); + u = 0.25 + w * (x[0] - 0.25); + v = 0.25 + w * (x[2] - 0.25); + printf("[3] w=%g u=%g v=%g fabs(x[1] - 0.25)=%g\n", w, u, v, fabs(x[1] - 0.25)); + + w = 0.25 / (0.25 - x[0]); + u = 0.25 + w * (x[1] - 0.25); + v = 0.25 + w * (x[2] - 0.25); + printf("[3] w=%g u=%g v=%g fabs(x[0] - 0.25)=%g\n", w, u, v, fabs(x[0] - 0.25)); + } + + if(fabs(x[2] - 0.25) > INSIDE_EPS) + { + w = 0.25 / (0.25 - x[2]); + if(w > 0) + { + u = 0.25 + w * (x[0] - 0.25); + v = 0.25 + w * (x[1] - 0.25); + if(u > -INSIDE_EPS && v > -INSIDE_EPS && (1 - (u + v) > -INSIDE_EPS)) + { + *nexttetra = t->t[3]; + return 0; + } + } + } + + if(fabs(x[1] - 0.25) > INSIDE_EPS) + { + w = 0.25 / (0.25 - x[1]); + if(w > 0) + { + u = 0.25 + w * (x[0] - 0.25); + v = 0.25 + w * (x[2] - 0.25); + if(u > -INSIDE_EPS && v > -INSIDE_EPS && (1 - (u + v) > -INSIDE_EPS)) + { + *nexttetra = t->t[2]; + return 0; + } + } + } + + if(fabs(x[0] - 0.25) > INSIDE_EPS) + { + w = 0.25 / (0.25 - x[0]); + if(w > 0) + { + u = 0.25 + w * (x[1] - 0.25); + v = 0.25 + w * (x[2] - 0.25); + if(u > -INSIDE_EPS && v > -INSIDE_EPS && (1 - (u + v) > -INSIDE_EPS)) + { + *nexttetra = t->t[1]; + return 0; + } + } + } + + *nexttetra = t->t[0]; + return 0; + } + } + + /* here we need to decide whether we have a degenerate case, i.e. + whether we think the point lies on a face or an edge of the tetrahedron */ + + if(ErrorFlag) + { + printf("doing exact test for tetra=%d\n", (int)(t - DT)); + } + + Count_InTetraExact++; + + if((ivol = Orient3d_Exact(p0, p1, p2, p3)) <= 0) + { + printf("flat or negatively oriented tetrahedron found (vol=%d)\n", ivol); + terminate("invalid tetrahedron"); + } + + flag3 = Orient3d_Exact(p0, p1, p2, p); + flag2 = Orient3d_Exact(p0, p3, p1, p); + flag1 = Orient3d_Exact(p0, p2, p3, p); + flag0 = Orient3d_Exact(p1, p3, p2, p); + + if(flag0 == 0) + count_zeros++; + + if(flag1 == 0) + count_zeros++; + + if(flag2 == 0) + count_zeros++; + + if(flag3 == 0) + count_zeros++; + + if(count_zeros > 2) + { + printf("task=%d flags=%d %d %d %d (axb)*c = %g\n", ThisTask, flag0, flag1, flag2, flag3, + (ay * bz - az * by) * cx + (az * bx - ax * bz) * cy + (ax * by - ay * bx) * cz); + + printf( + "task=%d pp0=%ld pp1=%ld pp2=%ld pp3=%ld p=%ld IDs=(%llu %llu %llu %llu %llu) pos_0=(%g|%g|%g) pos_1=(%g|%g|%g) " + "pos_2=(%g|%g|%g) pos_3=(%g|%g|%g) pos=(%g|%g|%g)\n", + ThisTask, p0 - DP, p1 - DP, p2 - DP, p3 - DP, p - DP, (long long)p0->ID, (long long)p1->ID, (long long)p2->ID, + (long long)p3->ID, (long long)p->ID, p0->x, p0->y, p0->z, p1->x, p1->y, p1->z, p2->x, p2->y, p2->z, p3->x, p3->y, p3->z, + p->x, p->y, p->z); + +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + printf("task=%d imageflags=(%d %d %d %d %d)\n", ThisTask, p0->image_flags, p1->image_flags, p2->image_flags, p3->image_flags, + p->image_flags); +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ + terminate("strange zero count"); + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0 && flag3 >= 0) + { + /* we have a point inside the tetra, but it may still be on one of the edges */ + + if(count_zeros == 0) + { + /* ok, let's split the tetra in 4, we are apparently well enough inside */ + return 1; + } + + if(count_zeros == 1) /* we lie on a face */ + { + if(flag0 == 0) + { + *edgeface_nr = 0; + return 2; + } + + if(flag1 == 0) + { + *edgeface_nr = 1; + return 2; + } + + if(flag2 == 0) + { + *edgeface_nr = 2; + return 2; + } + + if(flag3 == 0) + { + *edgeface_nr = 3; + return 2; + } + } + + if(count_zeros == 2) /* we lie on an edge */ + { + if(flag0 == 0 && flag1 == 0) + { + *edgeface_nr = 5; + return 3; + } + + if(flag0 == 0 && flag2 == 0) + { + *edgeface_nr = 4; + return 3; + } + + if(flag0 == 0 && flag3 == 0) + { + *edgeface_nr = 3; + return 3; + } + + if(flag1 == 0 && flag2 == 0) + { + *edgeface_nr = 2; + return 3; + } + + if(flag1 == 0 && flag3 == 0) + { + *edgeface_nr = 1; + return 3; + } + + if(flag2 == 0 && flag3 == 0) + { + *edgeface_nr = 0; + return 3; + } + } + } + + /* we seem to be lying clearly outside the tetrahedron */ + /* Let's determine a suitable neighbour */ + + /* if there is a single negative value, let's pick this side */ + + if(flag0 < 0 && flag1 >= 0 && flag2 >= 0 && flag3 >= 0) + { + *nexttetra = t->t[0]; + return 0; + } + + if(flag0 >= 0 && flag1 < 0 && flag2 >= 0 && flag3 >= 0) + { + *nexttetra = t->t[1]; + return 0; + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 < 0 && flag3 >= 0) + { + *nexttetra = t->t[2]; + return 0; + } + if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0 && flag3 < 0) + { + *nexttetra = t->t[3]; + return 0; + } + + /* there are at least two negative values. Let's pick a random one */ + + int ind = -1; + + if(flag0 < 0) + { + if(ind < 0) + ind = 0; + else + { + if(get_random_number() < 0.5) + ind = 0; + } + } + + if(flag1 < 0) + { + if(ind < 0) + ind = 1; + else + { + if(get_random_number() < 0.5) + ind = 1; + } + } + + if(flag2 < 0) + { + if(ind < 0) + ind = 2; + else + { + if(get_random_number() < 0.5) + ind = 2; + } + } + + if(flag3 < 0) + { + if(ind < 0) + ind = 3; + else + { + if(get_random_number() < 0.5) + ind = 3; + } + } + + *nexttetra = t->t[ind]; + return 0; +} + +/*! \brief Computes the circum-circle of all tetrahedra in mesh. + * + * \param[in, out] T Pointer to tessellation. + * + * \return void + */ +void compute_circumcircles(tessellation *T) +{ + tetra *DT = T->DT; + char *DTF = T->DTF; + int i; + + for(i = 0; i < T->Ndt; i++) + { + if(DTF[i] & 1) + continue; + DTF[i] |= 1; + + if(DT[i].t[0] < 0) /* deleted ? */ + continue; + + if(DT[i].p[0] == DPinfinity) + continue; + if(DT[i].p[1] == DPinfinity) + continue; + if(DT[i].p[2] == DPinfinity) + continue; + if(DT[i].p[3] == DPinfinity) + continue; + + update_circumcircle(T, i); + } +} + +/*! \brief Determinant calculation with arbitrary precision arithmetics. + * + * Auxiliary function for exact circum-circle calculation. + * + * \return void + */ +void calc_mpz_determinant(mpz_t det, mpz_t ax, mpz_t ay, mpz_t az, mpz_t bx, mpz_t by, mpz_t bz, mpz_t cx, mpz_t cy, mpz_t cz) +{ + mpz_t bz_cy, by_cz, cz_ay, cy_az, az_by, ay_bz; + + mpz_init(bz_cy); + mpz_mul(bz_cy, bz, cy); + + mpz_init(by_cz); + mpz_mul(by_cz, by, cz); + + mpz_init(cz_ay); + mpz_mul(cz_ay, cz, ay); + + mpz_init(cy_az); + mpz_mul(cy_az, cy, az); + + mpz_init(az_by); + mpz_mul(az_by, az, by); + + mpz_init(ay_bz); + mpz_mul(ay_bz, ay, bz); + + mpz_t bzcy_bycz, czay_cyaz, azby_aybz; + + mpz_init(bzcy_bycz); + mpz_init(czay_cyaz); + mpz_init(azby_aybz); + + mpz_sub(bzcy_bycz, bz_cy, by_cz); + mpz_sub(czay_cyaz, cz_ay, cy_az); + mpz_sub(azby_aybz, az_by, ay_bz); + + mpz_t a, b, c, ab; + + mpz_init(a); + mpz_init(b); + mpz_init(c); + + mpz_mul(a, bzcy_bycz, ax); + mpz_mul(b, czay_cyaz, bx); + mpz_mul(c, azby_aybz, cx); + + mpz_init(ab); + + mpz_add(ab, a, b); + mpz_add(det, ab, c); + + mpz_clear(ab); + mpz_clear(c); + mpz_clear(b); + mpz_clear(a); + mpz_clear(azby_aybz); + mpz_clear(czay_cyaz); + mpz_clear(bzcy_bycz); + mpz_clear(ay_bz); + mpz_clear(az_by); + mpz_clear(cy_az); + mpz_clear(cz_ay); + mpz_clear(by_cz); + mpz_clear(bz_cy); +} + +/*! \brief Arbitrary precision calculation of circum-circle. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[out] x X coordinate of circum-circle center. + * \param[out] y Y coordinate of circum-circle center. + * \param[out] z Z coordinate of circum-circle center. + * + * \return void + */ +void get_circumcircle_exact(tessellation *T, int tt, double *x, double *y, double *z) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra *t = &DT[tt]; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + mpz_t det, detA, detB, detC; + mpz_t qx, qy, qz; + mpz_t a2, b2, c2, tmp, AA, BB, CC; + mpz_t ax, ay, az, bx, by, bz, cx, cy, cz; + + mpz_init(det); + mpz_init(detA); + mpz_init(detB); + mpz_init(detC); + mpz_init(qx); + mpz_init(qy); + mpz_init(qz); + + mpz_init(a2); + mpz_init(b2); + mpz_init(c2); + mpz_init(tmp); + mpz_init(AA); + mpz_init(BB); + mpz_init(CC); + + mpz_init(ax); + mpz_init(ay); + mpz_init(az); + mpz_init(bx); + mpz_init(by); + mpz_init(bz); + mpz_init(cx); + mpz_init(cy); + mpz_init(cz); + +#ifndef OPTIMIZE_MEMORY_USAGE + MY_mpz_set_si(tmp, p1->ix); + MY_mpz_sub_ui(ax, tmp, p0->ix); + MY_mpz_set_si(tmp, p1->iy); + MY_mpz_sub_ui(ay, tmp, p0->iy); + MY_mpz_set_si(tmp, p1->iz); + MY_mpz_sub_ui(az, tmp, p0->iz); + + MY_mpz_set_si(tmp, p2->ix); + MY_mpz_sub_ui(bx, tmp, p0->ix); + MY_mpz_set_si(tmp, p2->iy); + MY_mpz_sub_ui(by, tmp, p0->iy); + MY_mpz_set_si(tmp, p2->iz); + MY_mpz_sub_ui(bz, tmp, p0->iz); + + MY_mpz_set_si(tmp, p3->ix); + MY_mpz_sub_ui(cx, tmp, p0->ix); + MY_mpz_set_si(tmp, p3->iy); + MY_mpz_sub_ui(cy, tmp, p0->iy); + MY_mpz_set_si(tmp, p3->iz); + MY_mpz_sub_ui(cz, tmp, p0->iz); +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + double pA_xyz[3], pB_xyz[3]; + + get_integers_for_point(p0, pA_ixyz, pA_xyz); + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + MY_mpz_set_si(tmp, pB_ixyz[0]); + MY_mpz_sub_ui(ax, tmp, pA_ixyz[0]); + MY_mpz_set_si(tmp, pB_ixyz[1]); + MY_mpz_sub_ui(ay, tmp, pA_ixyz[1]); + MY_mpz_set_si(tmp, pB_ixyz[2]); + MY_mpz_sub_ui(az, tmp, pA_ixyz[2]); + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + MY_mpz_set_si(tmp, pB_ixyz[0]); + MY_mpz_sub_ui(bx, tmp, pA_ixyz[0]); + MY_mpz_set_si(tmp, pB_ixyz[1]); + MY_mpz_sub_ui(by, tmp, pA_ixyz[1]); + MY_mpz_set_si(tmp, pB_ixyz[2]); + MY_mpz_sub_ui(bz, tmp, pA_ixyz[2]); + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + MY_mpz_set_si(tmp, pB_ixyz[0]); + MY_mpz_sub_ui(cx, tmp, pA_ixyz[0]); + MY_mpz_set_si(tmp, pB_ixyz[1]); + MY_mpz_sub_ui(cy, tmp, pA_ixyz[1]); + MY_mpz_set_si(tmp, pB_ixyz[2]); + MY_mpz_sub_ui(cz, tmp, pA_ixyz[2]); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + mpz_set(tmp, ax); + mpz_mul(AA, tmp, ax); + mpz_set(tmp, ay); + mpz_mul(BB, tmp, ay); + mpz_set(tmp, az); + mpz_mul(CC, tmp, az); + mpz_add(tmp, AA, BB); + mpz_add(a2, tmp, CC); + + mpz_set(tmp, bx); + mpz_mul(AA, tmp, bx); + mpz_set(tmp, by); + mpz_mul(BB, tmp, by); + mpz_set(tmp, bz); + mpz_mul(CC, tmp, bz); + mpz_add(tmp, AA, BB); + mpz_add(b2, tmp, CC); + + mpz_set(tmp, cx); + mpz_mul(AA, tmp, cx); + mpz_set(tmp, cy); + mpz_mul(BB, tmp, cy); + mpz_set(tmp, cz); + mpz_mul(CC, tmp, cz); + mpz_add(tmp, AA, BB); + mpz_add(c2, tmp, CC); + + calc_mpz_determinant(det, ax, ay, az, bx, by, bz, cx, cy, cz); + calc_mpz_determinant(detA, a2, ay, az, b2, by, bz, c2, cy, cz); + calc_mpz_determinant(detB, ax, a2, az, bx, b2, bz, cx, c2, cz); + calc_mpz_determinant(detC, ax, ay, a2, bx, by, b2, cx, cy, c2); + + mpz_cdiv_q(tmp, detA, det); + mpz_tdiv_q_2exp(qx, tmp, 1); + + mpz_cdiv_q(tmp, detB, det); + mpz_tdiv_q_2exp(qy, tmp, 1); + + mpz_cdiv_q(tmp, detC, det); + mpz_tdiv_q_2exp(qz, tmp, 1); + +#ifndef OPTIMIZE_MEMORY_USAGE + MY_mpz_set_si(tmp, p0->ix); + mpz_add(AA, qx, tmp); + + MY_mpz_set_si(tmp, p0->iy); + mpz_add(BB, qy, tmp); + + MY_mpz_set_si(tmp, p0->iz); + mpz_add(CC, qz, tmp); +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + MY_mpz_set_si(tmp, pA_ixyz[0]); + mpz_add(AA, qx, tmp); + + MY_mpz_set_si(tmp, pA_ixyz[1]); + mpz_add(BB, qy, tmp); + + MY_mpz_set_si(tmp, pA_ixyz[2]); + mpz_add(CC, qz, tmp); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + double xx, yy, zz; + + xx = mpz_get_d(AA); + yy = mpz_get_d(BB); + zz = mpz_get_d(CC); + + xx /= (1LLu << USEDBITS); + yy /= (1LLu << USEDBITS); + zz /= (1LLu << USEDBITS); + + xx = xx / ConversionFac + CentralOffsetX; + yy = yy / ConversionFac + CentralOffsetY; + zz = zz / ConversionFac + CentralOffsetZ; + + *x = xx; + *y = yy; + *z = zz; + + mpz_clear(det); + mpz_clear(detA); + mpz_clear(detB); + mpz_clear(detC); + mpz_clear(qx); + mpz_clear(qy); + mpz_clear(qz); + + mpz_clear(a2); + mpz_clear(b2); + mpz_clear(c2); + mpz_clear(tmp); + mpz_clear(AA); + mpz_clear(BB); + mpz_clear(CC); + + mpz_clear(ax); + mpz_clear(ay); + mpz_clear(az); + mpz_clear(bx); + mpz_clear(by); + mpz_clear(bz); + mpz_clear(cx); + mpz_clear(cy); + mpz_clear(cz); +} + +/*! \brief Computes the circum-circle of tetrahedron tt. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index of triangle in DT array. + * + * \return void + */ +void update_circumcircle(tessellation *T, int tt) +{ + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + point *DP = T->DP; + tetra *t = &DT[tt]; + tetra_center *tc = &DTC[tt]; + + if(t->t[0] < 0) /* deleted ? */ + return; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return; + +#ifndef OPTIMIZE_MEMORY_USAGE + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + double az = p1->zz - p0->zz; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + double bz = p2->zz - p0->zz; + + double cx = p3->xx - p0->xx; + double cy = p3->yy - p0->yy; + double cz = p3->zz - p0->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double ax, ay, az, bx, by, bz, cx, cy, cz; + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p0, pA_ixyz, pA_xyz); + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double aa = 0.5 * (ax * ax + ay * ay + az * az); + double bb = 0.5 * (bx * bx + by * by + bz * bz); + double cc = 0.5 * (cx * cx + cy * cy + cz * cz); + + double mv_data[] = {ax, ay, az, aa, bx, by, bz, bb, cx, cy, cz, cc}; + double x[3]; + + int status = solve_linear_equations(mv_data, x); + + if(status < 0) + { + if(Orient3d_Exact(p0, p1, p2, p3) != 1) + { + printf("p0 = %g %g %g\n", p0->x, p0->y, p0->z); + printf("p1 = %g %g %g\n", p1->x, p1->y, p1->z); + printf("p2 = %g %g %g\n", p2->x, p2->y, p2->z); + printf("p3 = %g %g %g\n", p3->x, p3->y, p3->z); + + printf("Orient-Test=%d\n", Orient3d_Exact(p0, p1, p2, p3)); + printf("tetra-volume=%g tetra=%d\n", calculate_tetra_volume(p0, p1, p2, p3), tt); + + return; + } + + double xc, yc, zc; + + get_circumcircle_exact(T, tt, &xc, &yc, &zc); + + tc->cx = xc; + tc->cy = yc; + tc->cz = zc; + } + else + { +#ifndef OPTIMIZE_MEMORY_USAGE + x[0] += p0->xx; + x[1] += p0->yy; + x[2] += p0->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + x[0] += pA_xyz[0]; + x[1] += pA_xyz[1]; + x[2] += pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + tc->cx = (x[0] - 1.0) / ConversionFac + CentralOffsetX; + tc->cy = (x[1] - 1.0) / ConversionFac + CentralOffsetY; + tc->cz = (x[2] - 1.0) / ConversionFac + CentralOffsetZ; + } +} + +/*! \brief Returns the orientation of the tetrahedron. + * + * \param[in] p0 Point spanning the tetrahedron. + * \param[in] p1 Point spanning the tetrahedron. + * \param[in] p2 Point spanning the tetrahedron. + * \param[in] p3 Point spanning the tetrahedron. + * + * \return -1: negative orientation; +1 positive orientation. + */ +int test_tetra_orientation(point *p0, point *p1, point *p2, point *p3) +{ + double nx, ny, nz; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return +1; + +#ifndef OPTIMIZE_MEMORY_USAGE + nx = (p1->yy - p0->yy) * (p2->zz - p0->zz) - (p1->zz - p0->zz) * (p2->yy - p0->yy); + ny = (p1->zz - p0->zz) * (p2->xx - p0->xx) - (p1->xx - p0->xx) * (p2->zz - p0->zz); + nz = (p1->xx - p0->xx) * (p2->yy - p0->yy) - (p1->yy - p0->yy) * (p2->xx - p0->xx); + if(nx * (p3->xx - p0->xx) + ny * (p3->yy - p0->yy) + nz * (p3->zz - p0->zz) >= 0) + return +1; + else + return -1; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + IntegerMapType p0_ixyz[3], p1_ixyz[3], p2_ixyz[3], p3_ixyz[3]; + double p0_xyz[3], p1_xyz[3], p2_xyz[3], p3_xyz[3]; + + get_integers_for_point(p0, p0_ixyz, p0_xyz); + get_integers_for_point(p1, p1_ixyz, p1_xyz); + get_integers_for_point(p2, p2_ixyz, p2_xyz); + get_integers_for_point(p3, p3_ixyz, p3_xyz); + + nx = (p1_xyz[1] - p0_xyz[1]) * (p2_xyz[2] - p0_xyz[2]) - (p1_xyz[2] - p0_xyz[2]) * (p2_xyz[1] - p0_xyz[1]); + ny = (p1_xyz[2] - p0_xyz[2]) * (p2_xyz[0] - p0_xyz[0]) - (p1_xyz[0] - p0_xyz[0]) * (p2_xyz[2] - p0_xyz[2]); + nz = (p1_xyz[0] - p0_xyz[0]) * (p2_xyz[1] - p0_xyz[1]) - (p1_xyz[1] - p0_xyz[1]) * (p2_xyz[0] - p0_xyz[0]); + + get_integers_for_point(p3, p3_ixyz, p3_xyz); + + if(nx * (p3_xyz[0] - p0_xyz[0]) + ny * (p3_xyz[1] - p0_xyz[1]) + nz * (p3_xyz[2] - p0_xyz[2]) >= 0) + return +1; + else + return -1; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ +} + +/*! \brief Calculate the volume of a tetrahedron. + * + * \param[in] p0 Point spanning the tetrahedron. + * \param[in] p1 Point spanning the tetrahedron. + * \param[in] p2 Point spanning the tetrahedron. + * \param[in] p3 Point spanning the tetrahedron. + * + * \return Volume of the tetrahedron. + */ +double calculate_tetra_volume(point *p0, point *p1, point *p2, point *p3) +{ + double nx, ny, nz; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return +1; + + nx = (p1->y - p0->y) * (p2->z - p0->z) - (p1->z - p0->z) * (p2->y - p0->y); + ny = (p1->z - p0->z) * (p2->x - p0->x) - (p1->x - p0->x) * (p2->z - p0->z); + nz = (p1->x - p0->x) * (p2->y - p0->y) - (p1->y - p0->y) * (p2->x - p0->x); + + return nx * (p3->x - p0->x) + ny * (p3->y - p0->y) + nz * (p3->z - p0->z); +} + +/*! \brief Add row in matrix equation. + * + * Auxiliary function for solve_linear_equations. + * + * \param[in, out] m Matrix. + * \param[in] r1 Index of row to be modified. + * \param[in] r2 Index of row which is added to r1. + * \param[in] fac Factor by which row r2 is multiplied before adding to r1. + * + * \return void + */ +void add_row(double *m, int r1, int r2, double fac) +{ + int i; + + for(i = 0; i < 4; i++) + m[r1 * 4 + i] += fac * m[r2 * 4 + i]; +} + +/*! \brief Solve system of linear equations for 3d Voronoi construction. + * + * \param[in, out] m Matrix. + * \param[out] res Result. + * + * \return 0 if success, <0 else. + */ +int solve_linear_equations(double *m, double *res) +{ + int ix, iy, iz, itmp; + + if(fabs(m[4]) > fabs(m[0])) + { + ix = 1; + iy = 0; + iz = 2; + } + else + { + ix = 0; + iy = 1; + iz = 2; + } + + if(fabs(m[8]) > fabs(m[ix * 4])) + { + ix = 2; + iy = 0; + iz = 1; + } + + add_row(m, iy, ix, -m[iy * 4] / m[ix * 4]); + add_row(m, iz, ix, -m[iz * 4] / m[ix * 4]); + + if(fabs(m[iz * 4 + 1]) > fabs(m[iy * 4 + 1])) + { + /* swap iy/iz */ + itmp = iy; + iy = iz; + iz = itmp; + } + + if(fabs(m[iy * 4 + 1]) < GAUSS_EPS) + return -1; + + add_row(m, iz, iy, -m[iz * 4 + 1] / m[iy * 4 + 1]); + + res[2] = m[iz * 4 + 3] / m[iz * 4 + 2]; + res[1] = (m[iy * 4 + 3] - res[2] * m[iy * 4 + 2]) / m[iy * 4 + 1]; + res[0] = (m[ix * 4 + 3] - res[2] * m[ix * 4 + 2] - res[1] * m[ix * 4 + 1]) / m[ix * 4]; + + if(fabs(m[iz * 4 + 2]) < GAUSS_EPS) + { + return -1; + } + if(fabs(m[iy * 4 + 1]) < GAUSS_EPS) + { + return -2; + } + if(fabs(m[ix * 4]) < GAUSS_EPS) + { + return -3; + } + + return 0; +} + +/*! \brief Converts coordinates of point p to integer values. + * + * \param[in, out] p Point. + * + * \return void + */ +#ifndef OPTIMIZE_MEMORY_USAGE +void set_integers_for_pointer(point *p) +{ + p->xx = (p->x - CentralOffsetX) * ConversionFac + 1.0; + p->yy = (p->y - CentralOffsetY) * ConversionFac + 1.0; + p->zz = (p->z - CentralOffsetZ) * ConversionFac + 1.0; + + if(p->xx < 1.0 || p->xx >= 2.0 || p->yy < 1.0 || p->yy >= 2.0 || p->zz < 1.0 || p->zz >= 2.0) + { + printf("(%g, %g, %g) (%g, %g, %g)\n", p->x, p->y, p->z, p->xx, p->yy, p->zz); + terminate("invalid coordinate range"); + } + + p->ix = double_to_voronoiint(p->xx); + p->iy = double_to_voronoiint(p->yy); + p->iz = double_to_voronoiint(p->zz); + + p->xx = mask_voronoi_int(p->xx); + p->yy = mask_voronoi_int(p->yy); + p->zz = mask_voronoi_int(p->zz); +} +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + +/*! \brief Checks if point is within a sphere using arbitrary precision + * operations. + * + * \param p0 Point 1 of tetrahedron. + * \param p1 Point 2 of tetrahedron. + * \param p2 Point 3 of tetrahedron. + * \param p3 Point 4 of tetrahedron. + * \param p Point to be checked if it is in cricumsphere. + * + * \return (-1,1); -1 in sphere, 1 outside. + */ +int InSphere_Exact(point *p0, point *p1, point *p2, point *p3, point *p) +{ + IntegerMapType ax, bx, cx, dx; + IntegerMapType ay, by, cy, dy; + IntegerMapType az, bz, cz, dz; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return -1; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->ix - p->ix; + ay = p0->iy - p->iy; + az = p0->iz - p->iz; + + bx = p1->ix - p->ix; + by = p1->iy - p->iy; + bz = p1->iz - p->iz; + + cx = p2->ix - p->ix; + cy = p2->iy - p->iy; + cz = p2->iz - p->iz; + + dx = p3->ix - p->ix; + dy = p3->iy - p->iy; + dz = p3->iz - p->iz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_ixyz[0] - pA_ixyz[0]; + ay = pB_ixyz[1] - pA_ixyz[1]; + az = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_ixyz[0] - pA_ixyz[0]; + by = pB_ixyz[1] - pA_ixyz[1]; + bz = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_ixyz[0] - pA_ixyz[0]; + cy = pB_ixyz[1] - pA_ixyz[1]; + cz = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + dx = pB_ixyz[0] - pA_ixyz[0]; + dy = pB_ixyz[1] - pA_ixyz[1]; + dz = pB_ixyz[2] - pA_ixyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + mpz_t ab, bc, cd, da, ac, bd; + + mpz_init(ab); + mpz_init(bc); + mpz_init(cd); + mpz_init(da); + mpz_init(ac); + mpz_init(bd); + + mpz_t tmp, AA, BB, CC; + + mpz_init(tmp); + mpz_init(AA); + mpz_init(BB); + mpz_init(CC); + + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(AA, tmp, by); + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(BB, tmp, ay); + mpz_sub(ab, AA, BB); + + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(AA, tmp, cy); + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(BB, tmp, by); + mpz_sub(bc, AA, BB); + + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(AA, tmp, dy); + MY_mpz_set_si(tmp, dx); + MY_mpz_mul_si(BB, tmp, cy); + mpz_sub(cd, AA, BB); + + MY_mpz_set_si(tmp, dx); + MY_mpz_mul_si(AA, tmp, ay); + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(BB, tmp, dy); + mpz_sub(da, AA, BB); + + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(AA, tmp, cy); + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(BB, tmp, ay); + mpz_sub(ac, AA, BB); + + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(AA, tmp, dy); + MY_mpz_set_si(tmp, dx); + MY_mpz_mul_si(BB, tmp, by); + mpz_sub(bd, AA, BB); + + mpz_t abc, bcd, cda, dab; + + mpz_init(abc); + mpz_init(bcd); + mpz_init(cda); + mpz_init(dab); + + MY_mpz_mul_si(AA, bc, az); + MY_mpz_mul_si(BB, ac, -bz); + MY_mpz_mul_si(CC, ab, cz); + mpz_add(tmp, AA, BB); + mpz_add(abc, tmp, CC); + + MY_mpz_mul_si(AA, cd, bz); + MY_mpz_mul_si(BB, bd, -cz); + MY_mpz_mul_si(CC, bc, dz); + mpz_add(tmp, AA, BB); + mpz_add(bcd, tmp, CC); + + MY_mpz_mul_si(AA, da, cz); + MY_mpz_mul_si(BB, ac, dz); + MY_mpz_mul_si(CC, cd, az); + mpz_add(tmp, AA, BB); + mpz_add(cda, tmp, CC); + + MY_mpz_mul_si(AA, ab, dz); + MY_mpz_mul_si(BB, bd, az); + MY_mpz_mul_si(CC, da, bz); + mpz_add(tmp, AA, BB); + mpz_add(dab, tmp, CC); + + mpz_t a2, b2, c2, d2; + + mpz_init(a2); + mpz_init(b2); + mpz_init(c2); + mpz_init(d2); + + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(AA, tmp, ax); + MY_mpz_set_si(tmp, ay); + MY_mpz_mul_si(BB, tmp, ay); + MY_mpz_set_si(tmp, az); + MY_mpz_mul_si(CC, tmp, az); + mpz_add(tmp, AA, BB); + mpz_add(a2, tmp, CC); + + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(AA, tmp, bx); + MY_mpz_set_si(tmp, by); + MY_mpz_mul_si(BB, tmp, by); + MY_mpz_set_si(tmp, bz); + MY_mpz_mul_si(CC, tmp, bz); + mpz_add(tmp, AA, BB); + mpz_add(b2, tmp, CC); + + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(AA, tmp, cx); + MY_mpz_set_si(tmp, cy); + MY_mpz_mul_si(BB, tmp, cy); + MY_mpz_set_si(tmp, cz); + MY_mpz_mul_si(CC, tmp, cz); + mpz_add(tmp, AA, BB); + mpz_add(c2, tmp, CC); + + MY_mpz_set_si(tmp, dx); + MY_mpz_mul_si(AA, tmp, dx); + MY_mpz_set_si(tmp, dy); + MY_mpz_mul_si(BB, tmp, dy); + MY_mpz_set_si(tmp, dz); + MY_mpz_mul_si(CC, tmp, dz); + mpz_add(tmp, AA, BB); + mpz_add(d2, tmp, CC); + + /* now calculate final result */ + + mpz_mul(AA, c2, dab); + mpz_mul(BB, d2, abc); + mpz_sub(tmp, AA, BB); + + mpz_mul(AA, a2, bcd); + mpz_mul(BB, b2, cda); + mpz_sub(CC, AA, BB); + + mpz_add(AA, tmp, CC); + + /* AA now contains the result */ + + int sign = mpz_sgn(AA); + + mpz_clear(d2); + mpz_clear(c2); + mpz_clear(b2); + mpz_clear(a2); + mpz_clear(dab); + mpz_clear(cda); + mpz_clear(bcd); + mpz_clear(abc); + mpz_clear(CC); + mpz_clear(BB); + mpz_clear(AA); + mpz_clear(tmp); + mpz_clear(bd); + mpz_clear(ac); + mpz_clear(da); + mpz_clear(cd); + mpz_clear(bc); + mpz_clear(ab); + + return sign; +} + +/*! \brief Checks if point is within a sphere. + * + * \param p0 Point 1 of tetrahedron. + * \param p1 Point 2 of tetrahedron. + * \param p2 Point 3 of tetrahedron. + * \param p3 Point 4 of tetrahedron. + * \param p Point to be checked if it is in cricumsphere. + * + * \return (-1,0,1); -1: in sphere, 0: on surfrace, 1: outside. + */ +int InSphere_Quick(point *p0, point *p1, point *p2, point *p3, point *p) +{ + double ax, bx, cx, dx; + double ay, by, cy, dy; + double az, bz, cz, dz; + double a2, b2, c2, d2; + double ab, bc, cd, da, ac, bd; + double abc, bcd, cda, dab; + double x; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return -1; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->xx - p->xx; + ay = p0->yy - p->yy; + az = p0->zz - p->zz; + + bx = p1->xx - p->xx; + by = p1->yy - p->yy; + bz = p1->zz - p->zz; + + cx = p2->xx - p->xx; + cy = p2->yy - p->yy; + cz = p2->zz - p->zz; + + dx = p3->xx - p->xx; + dy = p3->yy - p->yy; + dz = p3->zz - p->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + dx = pB_xyz[0] - pA_xyz[0]; + dy = pB_xyz[1] - pA_xyz[1]; + dz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + ab = ax * by - bx * ay; + bc = bx * cy - cx * by; + cd = cx * dy - dx * cy; + da = dx * ay - ax * dy; + ac = ax * cy - cx * ay; + bd = bx * dy - dx * by; + + abc = az * bc - bz * ac + cz * ab; + bcd = bz * cd - cz * bd + dz * bc; + cda = cz * da + dz * ac + az * cd; + dab = dz * ab + az * bd + bz * da; + + a2 = ax * ax + ay * ay + az * az; + b2 = bx * bx + by * by + bz * bz; + c2 = cx * cx + cy * cy + cz * cz; + d2 = dx * dx + dy * dy + dz * dz; + + x = ((c2 * dab - d2 * abc) + (a2 * bcd - b2 * cda)); + + if(x < 0) + return -1; + if(x > 0) + return +1; + + return 0; +} + +/*! \brief Checks if point is within a sphere with some error margin. + * + * \param p0 Point 1 of tetrahedron. + * \param p1 Point 2 of tetrahedron. + * \param p2 Point 3 of tetrahedron. + * \param p3 Point 4 of tetrahedron. + * \param p Point to be checked if it is in cricumsphere. + * + * \return (-1,0,1); -1: in sphere, 0: on surfrace (within error margin), + * +1: outside. + */ +int InSphere_Errorbound(point *p0, point *p1, point *p2, point *p3, point *p) +{ + double ax, bx, cx, dx; + double ay, by, cy, dy; + double az, bz, cz, dz; + double a2, b2, c2, d2; + double ab, bc, cd, da, ac, bd; + double abc, bcd, cda, dab; + double x; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return -1; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->xx - p->xx; + ay = p0->yy - p->yy; + az = p0->zz - p->zz; + + bx = p1->xx - p->xx; + by = p1->yy - p->yy; + bz = p1->zz - p->zz; + + cx = p2->xx - p->xx; + cy = p2->yy - p->yy; + cz = p2->zz - p->zz; + + dx = p3->xx - p->xx; + dy = p3->yy - p->yy; + dz = p3->zz - p->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + dx = pB_xyz[0] - pA_xyz[0]; + dy = pB_xyz[1] - pA_xyz[1]; + dz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double axby = ax * by; + double bxay = bx * ay; + double bxcy = bx * cy; + double cxby = cx * by; + double cxdy = cx * dy; + double dxcy = dx * cy; + double dxay = dx * ay; + double axdy = ax * dy; + double axcy = ax * cy; + double cxay = cx * ay; + double bxdy = bx * dy; + double dxby = dx * by; + + ab = axby - bxay; + bc = bxcy - cxby; + cd = cxdy - dxcy; + da = dxay - axdy; + ac = axcy - cxay; + bd = bxdy - dxby; + + abc = az * bc - bz * ac + cz * ab; + bcd = bz * cd - cz * bd + dz * bc; + cda = cz * da + dz * ac + az * cd; + dab = dz * ab + az * bd + bz * da; + + a2 = ax * ax + ay * ay + az * az; + b2 = bx * bx + by * by + bz * bz; + c2 = cx * cx + cy * cy + cz * cz; + d2 = dx * dx + dy * dy + dz * dz; + + x = ((c2 * dab - d2 * abc) + (a2 * bcd - b2 * cda)); + + /* calculate absolute maximum size */ + + ab = fabs(axby) + fabs(bxay); + bc = fabs(bxcy) + fabs(cxby); + cd = fabs(cxdy) + fabs(dxcy); + da = fabs(dxay) + fabs(axdy); + ac = fabs(axcy) + fabs(cxay); + bd = fabs(bxdy) + fabs(dxby); + + az = fabs(az); + bz = fabs(bz); + cz = fabs(cz); + dz = fabs(dz); + + abc = az * bc + bz * ac + cz * ab; + bcd = bz * cd + cz * bd + dz * bc; + cda = cz * da + dz * ac + az * cd; + dab = dz * ab + az * bd + bz * da; + + double sizelimit = ((c2 * dab + d2 * abc) + (a2 * bcd + b2 * cda)); + + double errbound = 1.0e-14 * sizelimit; + + if(x < -errbound) + return -1; + else if(x > errbound) + return +1; + + return 0; +} + +/*! \brief Returns orientation of tetrahedron using arbitrary precision + * floating point operations. + * + * \param[in] p0 First point of tetrahedron. + * \param[in] p1 Second point of tetrahedron. + * \param[in] p2 Third point of tetrahedron. + * \param[in] p3 Forth point of tetrahedron. + * + * \return (-1,0,1) -1 if negatively oriented, 0 if degenerate and 1 if + * positively oriented. + */ +int Orient3d_Exact(point *p0, point *p1, point *p2, point *p3) +{ + IntegerMapType ax, bx, cx; + IntegerMapType ay, by, cy; + IntegerMapType az, bz, cz; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->ix - p3->ix; + ay = p0->iy - p3->iy; + az = p0->iz - p3->iz; + + bx = p1->ix - p3->ix; + by = p1->iy - p3->iy; + bz = p1->iz - p3->iz; + + cx = p2->ix - p3->ix; + cy = p2->iy - p3->iy; + cz = p2->iz - p3->iz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p3, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_ixyz[0] - pA_ixyz[0]; + ay = pB_ixyz[1] - pA_ixyz[1]; + az = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_ixyz[0] - pA_ixyz[0]; + by = pB_ixyz[1] - pA_ixyz[1]; + bz = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_ixyz[0] - pA_ixyz[0]; + cy = pB_ixyz[1] - pA_ixyz[1]; + cz = pB_ixyz[2] - pA_ixyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + + mpz_t bz_cy, by_cz, cz_ay, cy_az, az_by, ay_bz; + mpz_t bz2, by2, cz2, cy2, az2, ay2; + + mpz_init(bz_cy); + mpz_init(bz2); + MY_mpz_set_si(bz2, bz); + MY_mpz_mul_si(bz_cy, bz2, cy); + + mpz_init(by_cz); + mpz_init(by2); + MY_mpz_set_si(by2, by); + MY_mpz_mul_si(by_cz, by2, cz); + + mpz_init(cz_ay); + mpz_init(cz2); + MY_mpz_set_si(cz2, cz); + MY_mpz_mul_si(cz_ay, cz2, ay); + + mpz_init(cy_az); + mpz_init(cy2); + MY_mpz_set_si(cy2, cy); + MY_mpz_mul_si(cy_az, cy2, az); + + mpz_init(az_by); + mpz_init(az2); + MY_mpz_set_si(az2, az); + MY_mpz_mul_si(az_by, az2, by); + + mpz_init(ay_bz); + mpz_init(ay2); + MY_mpz_set_si(ay2, ay); + MY_mpz_mul_si(ay_bz, ay2, bz); + + mpz_t bzcy_bycz, czay_cyaz, azby_aybz; + + mpz_init(bzcy_bycz); + mpz_init(czay_cyaz); + mpz_init(azby_aybz); + + mpz_sub(bzcy_bycz, bz_cy, by_cz); + mpz_sub(czay_cyaz, cz_ay, cy_az); + mpz_sub(azby_aybz, az_by, ay_bz); + + mpz_t a, b, c, ab, res; + + mpz_init(a); + mpz_init(b); + mpz_init(c); + + MY_mpz_mul_si(a, bzcy_bycz, ax); + MY_mpz_mul_si(b, czay_cyaz, bx); + MY_mpz_mul_si(c, azby_aybz, cx); + + mpz_init(ab); + mpz_init(res); + + mpz_add(ab, a, b); + mpz_add(res, ab, c); + + int sign = mpz_sgn(res); + + mpz_clear(res); + mpz_clear(ab); + mpz_clear(c); + mpz_clear(b); + mpz_clear(a); + mpz_clear(azby_aybz); + mpz_clear(czay_cyaz); + mpz_clear(bzcy_bycz); + mpz_clear(ay2); + mpz_clear(ay_bz); + mpz_clear(az2); + mpz_clear(az_by); + mpz_clear(cy2); + mpz_clear(cy_az); + mpz_clear(cz2); + mpz_clear(cz_ay); + mpz_clear(by2); + mpz_clear(by_cz); + mpz_clear(bz2); + mpz_clear(bz_cy); + + return sign; +} + +/*! \brief Returns orientation of tetrahedron. + * + * \param[in] p0 First point of tetrahedron. + * \param[in] p1 Second point of tetrahedron. + * \param[in] p2 Third point of tetrahedron. + * \param[in] p3 Forth point of tetrahedron. + * + * \return (-1,0,1) -1 if negatively oriented, 0 if degenerate and 1 if + * positively oriented. + */ +int Orient3d_Quick(point *p0, point *p1, point *p2, point *p3) +{ + double ax, bx, cx; + double ay, by, cy; + double az, bz, cz; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->xx - p3->xx; + ay = p0->yy - p3->yy; + az = p0->zz - p3->zz; + + bx = p1->xx - p3->xx; + by = p1->yy - p3->yy; + bz = p1->zz - p3->zz; + + cx = p2->xx - p3->xx; + cy = p2->yy - p3->yy; + cz = p2->zz - p3->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p3, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double x = (ax * (bz * cy - by * cz) + bx * (cz * ay - cy * az) + cx * (az * by - ay * bz)); + + if(x < 0) + return -1; + else if(x > 0) + return +1; + + return 0; +} + +/* \brief Returns orientation of tetrahedron. + * + * \param[in] p0 First point of tetrahedron. + * \param[in] p1 Second point of tetrahedron. + * \param[in] p2 Third point of tetrahedron. + * \param[in] p3 Forth point of tetrahedron. + * + * \return (-1,0,1) the orientation of the 4 points as +/-1. If either of the + * points is an infinity point, return 0. + */ +int Orient3d(point *p0, point *p1, point *p2, point *p3) +{ + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return 0; + +#ifndef OPTIMIZE_MEMORY_USAGE + double ax = p0->xx - p3->xx; + double ay = p0->yy - p3->yy; + double az = p0->zz - p3->zz; + + double bx = p1->xx - p3->xx; + double by = p1->yy - p3->yy; + double bz = p1->zz - p3->zz; + + double cx = p2->xx - p3->xx; + double cy = p2->yy - p3->yy; + double cz = p2->zz - p3->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double ax, ay, az, bx, by, bz, cx, cy, cz; + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p3, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double bzcy = bz * cy; + double bycz = by * cz; + double czay = cz * ay; + double cyaz = cy * az; + double azby = az * by; + double aybz = ay * bz; + + double x = ax * (bzcy - bycz) + bx * (czay - cyaz) + cx * (azby - aybz); + + double sizelimit = + fabs(ax) * (fabs(bzcy) + fabs(bycz)) + fabs(bx) * (fabs(czay) + fabs(cyaz)) + fabs(cx) * (fabs(azby) + fabs(aybz)); + + double errbound = 1.0e-14 * sizelimit; + + if(x < -errbound) + return -1; + else if(x > errbound) + return +1; + + return Orient3d_Exact(p0, p1, p2, p3); +} + +/*! \brief Data structure for face sort + */ +struct data_face_sort /* for sorting faces */ +{ + MyIDType ID; /* ID of corresponding cell */ + float normal[3]; /* non-normalized normal vector */ + int start; /* start index into vertex list */ + int len; /* number of vertices */ +}; + +static int *VertexEntries; /* face index list */ +static float *VertexCoordinates; /* Voronoi vertex coordinates (circumsphere centers of delaunay tetras) */ +static float *FaceNormals; /* normal vectors */ +static int Nvertices; /* number of Voronoi vertices */ +static int Nnormals; /* number of normals */ +static int Nentries; /* number of entries in Voronoi face vertex list (including IDs and face vertex count) */ +static int Nsort; /* number of ID sorted faces */ +static int MaxEntries, MaxFaces; /* for allocation */ +static struct data_face_sort *FaceSort; + +/*! \brief Face sorting kernel + * + * Compares ID of data_face_sort types. + * + * \param[in] a Fist element. + * \param[in] b Second element. + * + * \return (-1,0,1), -1 if a->ID < b ->ID. + */ +int compare_face_sort(const void *a, const void *b) +{ + if(((struct data_face_sort *)a)->ID < ((struct data_face_sort *)b)->ID) + return -1; + + if(((struct data_face_sort *)a)->ID > ((struct data_face_sort *)b)->ID) + return +1; + + return 0; +} + +/*! \brief Gathers faces in list. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void get_voronoi_face_vertex_indices(tessellation *T) +{ + int i, j, k, l, m, ii, jj, kk, ll, tetra_nr, edge_nr, next_tetra_nr, count, dp_1, dp_2; + tetra *prev, *next; + tetra *DT = T->DT; + point *DP = T->DP; + int bit, nr_next; + + /* loop over tetras */ + for(tetra_nr = 0; tetra_nr < Mesh.Ndt; tetra_nr++) + { + if(Mesh.DT[tetra_nr].t[0] < 0) /* skip deleted tetras */ + continue; + + /* edge flagging */ + bit = 1; + edge_nr = 0; + + /* loop over edges */ + while(Edge_visited[tetra_nr] != EDGE_ALL) + { + if((Edge_visited[tetra_nr] & bit) != 0) + { + bit <<= 1; + edge_nr++; + continue; + } + + tetra *t = &DT[tetra_nr]; + + /* edge-point relation */ + i = edge_start[edge_nr]; + j = edge_end[edge_nr]; + k = edge_opposite[edge_nr]; + l = edge_nexttetra[edge_nr]; + + /* mark edge as visited */ + Edge_visited[tetra_nr] |= (1 << edge_nr); + + /* delaunay points on both side of face */ + dp_1 = t->p[i]; + dp_2 = t->p[j]; + + /* skip large tetra */ + if(dp_1 < 0 || dp_2 < 0) + { + bit <<= 1; + edge_nr++; + continue; + } + + /* skip ghost points (both local and foreign) */ + if((DP[dp_1].task != ThisTask || DP[dp_1].index < 0 || DP[dp_1].index >= NumGas) && + (DP[dp_2].task != ThisTask || DP[dp_2].index < 0 || DP[dp_2].index >= NumGas)) + { + bit <<= 1; + edge_nr++; + continue; + } + + /* count number of face vertices */ + count = 0; + prev = t; + + do + { + count++; + next_tetra_nr = prev->t[l]; + next = &DT[next_tetra_nr]; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + i = ii; + l = ll; + j = jj; + k = kk; + + prev = next; + } + while(next != t); + + count++; + + /* get face normals (from both sides) */ + FaceNormals[Nnormals++] = (DP[dp_2].x - DP[dp_1].x); + FaceNormals[Nnormals++] = (DP[dp_2].y - DP[dp_1].y); + FaceNormals[Nnormals++] = (DP[dp_2].z - DP[dp_1].z); + FaceNormals[Nnormals++] = (DP[dp_1].x - DP[dp_2].x); + FaceNormals[Nnormals++] = (DP[dp_1].y - DP[dp_2].y); + FaceNormals[Nnormals++] = (DP[dp_1].z - DP[dp_2].z); + + /* fill vertex entry list, first ID, count then tetra numbers */ + VertexEntries[Nentries++] = (int)DP[dp_1].ID; + VertexEntries[Nentries++] = (int)DP[dp_2].ID; + VertexEntries[Nentries++] = (int)count; + VertexEntries[Nentries++] = (int)tetra_nr; + + /* get tetra indices of face vertices */ + count = 0; + prev = t; + do + { + count++; + next_tetra_nr = prev->t[l]; + next = &DT[next_tetra_nr]; + + VertexEntries[Nentries++] = (int)next_tetra_nr; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + /* flag edge */ + for(nr_next = 0; nr_next < 6; nr_next++) + if((edge_start[nr_next] == ii && edge_end[nr_next] == jj) || (edge_start[nr_next] == jj && edge_end[nr_next] == ii)) + { + if((Edge_visited[next_tetra_nr] & (1 << nr_next)) && next != t) + terminate("inconsistency"); + + Edge_visited[next_tetra_nr] |= (1 << nr_next); + break; + } + + i = ii; + l = ll; + j = jj; + k = kk; + + prev = next; + + if(Nentries > MaxEntries) + terminate("Nentries > MaxEntries"); + + if(Nnormals > MaxFaces) + terminate("Nentries > MaxEntries"); + } + while(next != t); + + bit <<= 1; + edge_nr++; + } + } +} + +/*! \brief Set Vertex coordinates in the respective array. + * + * Copys the coordinates from the DTC array of the tessellation to a + * designated array VertexCoordinates. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void get_voronoi_face_vertex_coordinates(tessellation *T) +{ + int tetra_nr = 0; + + for(tetra_nr = 0; tetra_nr < T->Ndt; tetra_nr++) + { + VertexCoordinates[3 * Nvertices + 0] = T->DTC[tetra_nr].cx; + VertexCoordinates[3 * Nvertices + 1] = T->DTC[tetra_nr].cy; + VertexCoordinates[3 * Nvertices + 2] = T->DTC[tetra_nr].cz; + Nvertices++; + } +} + +/*! \brief Function calls qsort for sorting faces by ID. + * + * Uses compare_face_sort as comparison function. Requires array FaceSort. + * + * \return void + */ +void sort_faces_by_ID(void) +{ + int i = 0, j = 0, k = 0; + + do + { + FaceSort[j].ID = VertexEntries[i + 0]; + FaceSort[j].start = i + 3; + FaceSort[j].len = VertexEntries[i + 2]; + FaceSort[j].normal[0] = FaceNormals[k++]; + FaceSort[j].normal[1] = FaceNormals[k++]; + FaceSort[j].normal[2] = FaceNormals[k++]; + j++; + + FaceSort[j].ID = VertexEntries[i + 1]; + FaceSort[j].start = i + 3; + FaceSort[j].len = VertexEntries[i + 2]; + FaceSort[j].normal[0] = FaceNormals[k++]; + FaceSort[j].normal[1] = FaceNormals[k++]; + FaceSort[j].normal[2] = FaceNormals[k++]; + j++; + + i += 3 + VertexEntries[i + 2]; + + if(j > MaxFaces) + terminate("j > MaxFaces"); + } + while(i < Nentries); + + Nsort = j; + + /* sort faces by ID */ + qsort(FaceSort, Nsort, sizeof(struct data_face_sort), compare_face_sort); +} + +/*! \brief Outputs Voronoi vertex indices to file. + * + * Outputs the Voronoi vertex indices from task writeTask to lastTask in file + * fname. + * + * \param[in] T Pointer to tessellation. + * \param[in] fname1 File name of file index data is written in. + * \param[in] fname2 File name of file face data is written in. + * \param[in] writeTask Task that gathers information and writes data. + * \param[in] lastTask Last task that is included in this dump. + * + * \return void + */ +void write_voronoi_face_vertex_indices(tessellation *T, char *fname1, char *fname2, int writeTask, int lastTask) +{ + FILE *fd1, *fd2; + MPI_Status status; + int nVertices_tot, nEntries_tot, nNormals_tot; + int nVertices_before, i, task, *tmp; + int *Nvertices_list, *Nentries_list, *Nnormals_list, *Nsort_list; + struct data_face_sort *tmp_sort; + + VertexEntries = mymalloc("VertexEntries", MaxEntries * sizeof(int)); + FaceNormals = mymalloc("VertexEntries", MaxFaces * sizeof(int)); + + /* get faces */ + get_voronoi_face_vertex_indices(T); + + FaceSort = (struct data_face_sort *)mymalloc("face_sort", sizeof(struct data_face_sort) * MaxFaces); + + /* sort faces */ + sort_faces_by_ID(); + + Nentries = 0; + for(i = 0; i < Nsort; i++) + Nentries += FaceSort[i].len + 2; + + /* I/O */ + Nvertices_list = mymalloc("Nvertices_list", sizeof(int) * NTask); + Nentries_list = mymalloc("Nentries_list", sizeof(int) * NTask); + Nsort_list = mymalloc("Nsort_list", sizeof(int) * NTask); + Nnormals_list = mymalloc("Nnormals_list", sizeof(int) * NTask); + + if(ThisTask == writeTask) + { + nVertices_tot = Nvertices; + nEntries_tot = Nentries; + nNormals_tot = Nnormals; + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&Nvertices_list[task], 1, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + MPI_Recv(&Nentries_list[task], 1, MPI_INT, task, TAG_LOCALN + 1, MPI_COMM_WORLD, &status); + MPI_Recv(&Nsort_list[task], 1, MPI_INT, task, TAG_LOCALN + 2, MPI_COMM_WORLD, &status); + MPI_Recv(&Nnormals_list[task], 1, MPI_INT, task, TAG_LOCALN + 3, MPI_COMM_WORLD, &status); + MPI_Send(&nVertices_tot, 1, MPI_INT, task, TAG_N, MPI_COMM_WORLD); + nVertices_tot += Nvertices_list[task]; + nEntries_tot += Nentries_list[task]; + nNormals_tot += Nnormals_list[task]; + } + if(!(fd1 = fopen(fname1, "w"))) + terminate("I/O error"); + + if(!(fd2 = fopen(fname2, "w"))) + terminate("I/O error"); + + my_fwrite(&nEntries_tot, sizeof(int), 1, fd1); + my_fwrite(&nNormals_tot, sizeof(int), 1, fd2); + for(i = 0; i < Nsort; i++) + { + my_fwrite(&FaceSort[i].ID, sizeof(int), 1, fd1); + my_fwrite(&FaceSort[i].len, sizeof(int), 1, fd1); + my_fwrite(&VertexEntries[FaceSort[i].start], sizeof(int) * FaceSort[i].len, 1, fd1); + my_fwrite(FaceSort[i].normal, 3 * sizeof(float), 1, fd2); + } + + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp_sort = (struct data_face_sort *)mymalloc("tmp_sort", sizeof(struct data_face_sort) * Nsort_list[task]); + tmp = mymalloc("tmp", sizeof(int) * Nentries_list[task]); + MPI_Recv(tmp, Nentries_list[task], MPI_INT, task, TAG_N + 1, MPI_COMM_WORLD, &status); + MPI_Recv(tmp_sort, Nsort_list[task] * sizeof(struct data_face_sort), MPI_BYTE, task, TAG_N + 2, MPI_COMM_WORLD, &status); + + for(i = 0; i < Nsort_list[task]; i++) + { + my_fwrite(&tmp_sort[i].ID, sizeof(int), 1, fd1); + my_fwrite(&tmp_sort[i].len, sizeof(int), 1, fd1); + my_fwrite(&tmp[tmp_sort[i].start], sizeof(int) * tmp_sort[i].len, 1, fd1); + my_fwrite(tmp_sort[i].normal, 3 * sizeof(float), 1, fd2); + } + myfree(tmp); + myfree(tmp_sort); + } + fclose(fd2); + fclose(fd1); + } + else + { + MPI_Send(&Nvertices, 1, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Send(&Nentries, 1, MPI_INT, writeTask, TAG_LOCALN + 1, MPI_COMM_WORLD); + MPI_Send(&Nsort, 1, MPI_INT, writeTask, TAG_LOCALN + 2, MPI_COMM_WORLD); + MPI_Send(&Nnormals, 1, MPI_INT, writeTask, TAG_LOCALN + 3, MPI_COMM_WORLD); + MPI_Recv(&nVertices_before, 1, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status); + for(i = 0; i < Nentries; i++) + if(VertexEntries[i] >= 0) + VertexEntries[i] += nVertices_before; + MPI_Send(VertexEntries, Nentries, MPI_INT, writeTask, TAG_N + 1, MPI_COMM_WORLD); + MPI_Send(FaceSort, Nsort * sizeof(struct data_face_sort), MPI_BYTE, writeTask, TAG_N + 2, MPI_COMM_WORLD); + } + + myfree(Nnormals_list); + myfree(Nsort_list); + myfree(Nentries_list); + myfree(Nvertices_list); + myfree(FaceSort); + myfree(FaceNormals); + myfree(VertexEntries); +} + +/*! \brief Outputs Voronoi vertex coordinates to file. + * + * Outputs the Voronoi vertex coordinates from task write Task to lastTask in + * file fname. + * + * \param[in] T Pointer to tessellation. + * \param[in] fname File name of file the data is written in. + * \param[in] writeTask Task that gathers information and writes data. + * \param[in] lastTask Last task that is included in this dump. + * + * \return void + */ +void write_voronoi_face_vertex_coordinates(tessellation *T, char *fname, int writeTask, int lastTask) +{ + FILE *fd; + MPI_Status status; + int *Nvertices_list; + int nVertices_tot, task; + float *tmp; + + VertexCoordinates = mymalloc("VertexCoordinates", MaxEntries * 3 * sizeof(float)); + + /* get coordinates */ + get_voronoi_face_vertex_coordinates(T); + + /* I/O */ + Nvertices_list = mymalloc("Nvertices_list", sizeof(int) * NTask); + if(ThisTask == writeTask) + { + nVertices_tot = Nvertices; + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&Nvertices_list[task], 1, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + nVertices_tot += Nvertices_list[task]; + } + + if(!(fd = fopen(fname, "w"))) + terminate("I/O error"); + + my_fwrite(&nVertices_tot, sizeof(int), 1, fd); + my_fwrite(VertexCoordinates, sizeof(float), 3 * Nvertices, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", 3 * sizeof(float) * Nvertices_list[task]); + MPI_Recv(tmp, 3 * Nvertices_list[task], MPI_FLOAT, task, TAG_N + 1, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(float), 3 * Nvertices_list[task], fd); + myfree(tmp); + } + fclose(fd); + } + else + { + MPI_Send(&Nvertices, 1, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Send(VertexCoordinates, 3 * Nvertices, MPI_FLOAT, writeTask, TAG_N + 1, MPI_COMM_WORLD); + } + myfree(Nvertices_list); + myfree(VertexCoordinates); +} + +/*! \brief Outputs Voronoi mesh to file. + * + * Outputs the Voronoi mesh data from task write Task to lastTask in file + * fname. + * + * \param[in] T Pointer to tessellation. + * \param[in] fname File name of file the data is written in. + * \param[in] writeTask Task that gathers information and writes data. + * \param[in] lastTask Last task that is included in this dump. + * + * \return void + */ +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask) +{ + char buf1[255], buf2[255]; + + MaxEntries = 1000 * NumGas; + MaxFaces = 100 * NumGas; + + /* coordinates */ + Nvertices = 0; + sprintf(buf1, "%s_coordinates.dat", fname); + write_voronoi_face_vertex_coordinates(T, buf1, writeTask, lastTask); + + /* indices */ + Edge_visited = mymalloc_movable(&Edge_visited, "Edge_visited", Mesh.Ndt * sizeof(unsigned char)); + int i; + for(i = 0; i < Mesh.Ndt; i++) + Edge_visited[i] = 0; + + Nentries = 0; + Nnormals = 0; + sprintf(buf1, "%s_indices.dat", fname); + sprintf(buf2, "%s_normals.dat", fname); + write_voronoi_face_vertex_indices(T, buf1, buf2, writeTask, lastTask); + myfree(Edge_visited); +} + +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c new file mode 100644 index 0000000000..42c6f06b1f --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c @@ -0,0 +1,407 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_check.c + * \date 05/2018 + * \brief Algorithms to check Voronoi mesh construction. + * \details contains functions: + * void check_for_min_distance(tessellation * T) + * void check_links(tessellation * T) + * void check_orientations(tessellation * T) + * void check_tetras(tessellation * T, int npoints) + * int points_compare(const void *a, const void *b) + * void check_triangles(tessellation * T, int npoints) + * void check_orientations(tessellation * T) + * void check_links(tessellation * T) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 22.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if !defined(TWODIMS) && !defined(ONEDIMS) /* three-dimensional test code */ + +int points_compare(const void *a, const void *b); + +/*! \brief Checks minimum distance between Delaunay points making sure it is + * nonzero. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void check_for_min_distance(tessellation *T) +{ + point *DP = T->DP; + int i, j; + double r2, r2min; + char msg[200]; + + for(i = 0, r2min = 1.0e30; i < T->Ndp; i++) + { + printf("i=%d\n", i); + + for(j = i + 1; j < T->Ndp; j++) + { + r2 = (DP[i].x - DP[j].x) * (DP[i].x - DP[j].x) + (DP[i].y - DP[j].y) * (DP[i].y - DP[j].y) + + (DP[i].z - DP[j].z) * (DP[i].z - DP[j].z); + if(r2 < r2min) + r2min = r2; + + if(r2min == 0) + { + sprintf(msg, "i=%d j=%d equal. DP[i].index=%d DP[j].index=%d\n", i, j, DP[i].index, DP[j].index); + terminate(msg) + } + } + } + + printf("min distance=%g\n", sqrt(r2min)); +} + +/*! \brief Checks if tessellation links are correct. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void check_links(tessellation *T) +{ + tetra *DT = T->DT; + int i, j, s, c, flag = 0; + int pl[3], pr[3]; + char msg[200]; + + for(i = 0; i < T->Ndt; i++) + { + if(DT[i].t[0] < 0) /* deleted ? */ + continue; + + for(j = 0; j < 4; j++) + { + if(DT[DT[i].t[j]].t[DT[i].s[j]] != i) + { + printf("LINK for tetra=%d j=%d DT[i].s[j]=%d incorrect %d\n", i, j, DT[i].s[j], (int)(DT[DT[i].t[j]].t[DT[i].s[j]])); + } + } + + for(j = 0; j < 4; j++) + { + for(s = 0, c = 0; s < 4; s++) + if(s != j) + pl[c++] = DT[i].p[s]; + + for(s = 0, c = 0; s < 4; s++) + if(s != DT[i].s[j]) + pr[c++] = DT[DT[i].t[j]].p[s]; + + /* sort the points */ + + mysort(&pl[0], 3, sizeof(int), points_compare); + mysort(&pr[0], 3, sizeof(int), points_compare); + + for(s = 0; s < 3; s++) + { + if(pl[s] != pr[s]) + { + sprintf(msg, "LINK for i=%d j=%d incorrect. points of triangles don't match up s=%d\n", i, j, s); + flag = 1; + } + } + + if(flag) + terminate(msg); + } + } + + printf("links ok\n"); +} + +/*! \brief Checks if orientations of tetrahedra are positive. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void check_orientations(tessellation *T) +{ + tetra *DT = T->DT; + point *DP = T->DP; + int i, ivol; + double vol, volmin = 1.0e30; + char msg[200]; + + for(i = 0; i < T->Ndt; i++) + { + tetra *t = &DT[i]; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + if(t->t[0] < 0) /* deleted ? */ + continue; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + continue; + + vol = calculate_tetra_volume(p0, p1, p2, p3); + ivol = Orient3d_Exact(p0, p1, p2, p3); + + if(ivol <= 0) + { + sprintf(msg, "Tetra %d is NEGATIVE (%d %d %d %d) oriented or FLAT: ivol=%d vol=%g\n", i, (int)(t->p[0]), (int)(t->p[1]), + (int)(t->p[2]), (int)(t->p[3]), ivol, vol); + terminate(msg); + } + + if(vol < volmin) + volmin = vol; + } + + printf("orientations ok, volmin=%g\n", volmin); +} + +/*! \brief Checks if tetrahedra are valid. + * + * \param[in] T pointer to tessellation. + * \param[in] npoints Number of points. + * + * \return void + */ +void check_tetras(tessellation *T, int npoints) +{ + tetra *DT = T->DT; + point *DP = T->DP; + int i, j, res, res_exact; + char msg[200]; + + for(i = 0; i < T->Ndt; i++) + { + if((i % 100) == 0) + printf("check tetra i=%d/%d\n", i, T->Ndt); + + tetra *t = &DT[i]; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + if(t->t[0] < 0) /* deleted ? */ + continue; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + continue; + + if(test_tetra_orientation(p0, p1, p2, p3) > 0) + { + } + else + { + sprintf(msg, "Tetra %d is NEGATIVE oriented\n", i); + terminate(msg); + } + + for(j = 0; j < npoints; j++) + { + if(t->p[0] != j) + if(t->p[1] != j) + if(t->p[2] != j) + if(t->p[3] != j) + { + res = InSphere_Errorbound(p0, p1, p2, p3, &DP[j]); + + if(res >= 0) + { + res_exact = InSphere_Exact(p0, p1, p2, p3, &DP[j]); + + if(res_exact > 0) + { + sprintf(msg, "ERROR tetra=%d: point=%d in tetra with edges=%d|%d|%d|%d res=%d|%d\n", i, j, + (int)(t->p[0]), (int)(t->p[1]), (int)(t->p[2]), (int)(t->p[3]), res, res_exact); + terminate(msg); + } + } + } + } + } + + printf("Tetrahedra OK\n"); +} + +/*! \brief Compare integer value of two variables. + * + * \param[in] a Pointer to first value. + * \param[in] b Pointer to second value. + * + * \return (-1,0,1) -1 iF a < b. + */ +int points_compare(const void *a, const void *b) +{ + if(*((int *)a) < *((int *)b)) + return -1; + + if(*((int *)a) > *((int *)b)) + return +1; + + return 0; +} + +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ + +#ifdef TWODIMS /* two-dimensional test code */ + +/*! \brief Check 2d Voronoi mesh triangles. + * + * \param[in] T Pointer to tessellation. + * \param[in] npoints Number of points. + * + * \return void + */ +void check_triangles(tessellation *T, int npoints) +{ + int i, j, res, res_exact; + char msg[200]; + + tetra *DT = T->DT; + + for(i = 0; i < T->Ndt; i++) + { + if(DT[i].p[0] == DPinfinity) + continue; + if(DT[i].p[1] == DPinfinity) + continue; + if(DT[i].p[2] == DPinfinity) + continue; + + if(Orient2d_Exact(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]) != 1) + { + sprintf(msg, "Triangle %d is NEGATIVE oriented or FLAT\n", i); + terminate(msg); + } + + for(j = 0; j < npoints; j++) + { + if(DT[i].p[0] != j) + if(DT[i].p[1] != j) + if(DT[i].p[2] != j) + { + res = InCircle_Quick(T, DT[i].p[0], DT[i].p[1], DT[i].p[2], j); + + if(res > 0) + { + res_exact = InCircle_Exact(T, DT[i].p[0], DT[i].p[1], DT[i].p[2], j); + + if(res_exact > 0) + { + sprintf(msg, "ERROR: point=%d lies in triangle=%d with edges=%d|%d|%d res=%d|%d\n", j, i, + (int)(DT[i].p[0]), (int)(DT[i].p[1]), (int)(DT[i].p[2]), res, res_exact); + terminate(msg); + } + } + } + } + } + + printf("triangles ok\n"); +} + +/*! \brief Check the orientations of triangles in 2d Voronoi mesh. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void check_orientations(tessellation *T) +{ + int i, ivol; + double vol, volmin = 1.0e30; + char msg[200]; + + tetra *DT = T->DT; + + for(i = 0; i < T->Ndt; i++) + { + if(DT[i].p[0] == DPinfinity) + continue; + if(DT[i].p[1] == DPinfinity) + continue; + if(DT[i].p[2] == DPinfinity) + continue; + + vol = test_triangle_orientation(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]); + ivol = Orient2d_Exact(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]); + + if(ivol <= 0) + { + double vol2 = Orient2d_Quick(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]); + + sprintf(msg, "Triangle %d is NEGATIVE (%d %d %d) oriented or FLAT: ivol=%d vol=%g|%g\n", i, (int)(DT[i].p[0]), + (int)(DT[i].p[1]), (int)(DT[i].p[2]), ivol, vol, vol2); + terminate(msg); + } + + if(vol < volmin) + volmin = vol; + } + + printf("orientations ok, volmin=%g\n", volmin); +} + +/*! \brief Check links in 2d Voronoi mesh. + * + * \param[in] T Pointer to tesselation. + * + * \return void + */ +void check_links(tessellation *T) +{ + int i, j; + char msg[200]; + + tetra *DT = T->DT; + + for(i = 0; i < T->Ndt; i++) + { + for(j = 0; j < 3; j++) + { + if(DT[DT[i].t[j]].t[DT[i].s[j]] != i) + { + sprintf(msg, "LINK for i=%d j=%d incorrect\n", i, j); + terminate(msg); + } + } + } +} + +#endif /* #ifdef TWODIMS */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c new file mode 100644 index 0000000000..99afd85cc0 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c @@ -0,0 +1,1088 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_derefinement.c + * \date 05/2018 + * \brief Contains routines for de-refinement. + * \details contains functions: + * static void derefine_add_ngb(int edge, int i, int j, double + * area, int t, int nr) + * int do_derefinements(void) + * static void derefine_apply_probe_list(void) + * static void derefine_apply_flux_list(void) + * static int derefine_flux_list_data_compare(const void *a, + * const void *b) + * static int derefine_probe_list_data_compare_task(const + * void *a, const void *b) + * static int derefine_compare_seq_DP_ID(const void *a, + * const void *b) + * static void derefine_exchange_flag(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 22.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) +#define DEREF_SA_FAC 1.0e-4 + +int do_derefinements(void); +static void derefine_add_ngb(int edge, int i, int j, double area, int tt, int nr); +static int derefine_compare_seq_DP_ID(const void *a, const void *b); +static int derefine_flux_list_data_compare(const void *a, const void *b); +static void derefine_apply_flux_list(void); +static void derefine_exchange_flag(void); +static void derefine_apply_probe_list(void); +static int derefine_probe_list_data_compare_task(const void *a, const void *b); + +/*! \brief Data for derefinement: flag for de-refinement and index of cell. + */ +static struct derefine_particle_data +{ + int Flag; + int dp_index; +} * deref_SphP; + +/*! \brief Data structure for communicating de-refinement flags. + */ +static struct flagexch +{ + int Flag; + MyIDType ID; +} * FlagExch; + +/*! \brief Data structure to flag Delaunay data. + */ +static struct flag_delaunay_data +{ + int Flag; +} * flag_DP; + +/*! \brief Structure defining auxiliary Delaunay data (for sorting). + */ +static struct seq_delaunay_data +{ + MyFloat rnd; + int rank, index; + MyIDType ID; +} * seq_DP; + +/*! \brief Structure defining probe list element. + */ +static struct probe_list_data +{ + int task, index; + int sendpart; + int flag; +} * ProbeList; + +/*! \brief Structure defining flux list element. + */ +static struct flux_list_data +{ + int task, index; + double dM, dP[3]; +#ifdef MHD + double dB[3]; +#endif /* #ifdef MHD */ + +#ifndef ISOTHERM_EQS + double dEnergy; +#endif /* #ifndef ISOTHERM_EQS */ + +#ifdef MAXSCALARS + double dConservedScalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ +} * FluxList; + +static int Nflux, MaxNflux; + +static int *first_ngb, *last_ngb, first_free_ngb; + +/*! \brief Structure defining neighbour data. + */ +static struct ngb_data +{ +#ifdef OPTIMIZE_MEMORY_USAGE + MyFloat area; +#else /* #ifdef OPTIMIZE_MEMORY_USAGE */ + double area; +#endif /* #ifdef OPTIMIZE_MEMORY_USAGE #else */ + int index; + int edge; + int next_ngb; + int t, nr; /* delaunay tetra and edge number that generated this face */ +} * ngb; + +static int n_tri, max_n_tri; +static triangle *trilist; + +#ifdef REFINEMENT_SPLIT_CELLS +extern char *FlagDoNotRefine; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + +/*! \brief Adds cell in list ngb. + * + * \param[in] edge Element 'edge' in ngb. + * \param[in] i Index in first_ngb and last_ngb lists. + * \param[in] j Element 'index' in ngb. + * \param[in] area Element 'area' in ngb. + * \param[in] t Element 't' in ngb. + * \param[in] nr Element 'nr' in ngb. + * + * \return void + */ +static void derefine_add_ngb(int edge, int i, int j, double area, int t, int nr) +{ + if(i >= 0 && j >= 0) + { + if(i >= Mesh.Ndp || j >= Mesh.Ndp) + { + terminate("i>= Ndp || j>= Ndp"); + } + + if(first_ngb[i] >= 0) + { + ngb[last_ngb[i]].next_ngb = first_free_ngb; + last_ngb[i] = first_free_ngb; + } + else + { + first_ngb[i] = last_ngb[i] = first_free_ngb; + } + + ngb[first_free_ngb].area = area; + ngb[first_free_ngb].edge = edge; + ngb[first_free_ngb].t = t; + ngb[first_free_ngb].nr = nr; + ngb[first_free_ngb].index = j; + ngb[first_free_ngb].next_ngb = -1; + first_free_ngb++; + } +} + +/*! \brief Loop over all active cells and derefine the ones that need to be + * derefined. + * + * \return Number of derefined cells. + */ +int do_derefinements(void) +{ + int idx, i, j, k, count, countall; + + TIMER_START(CPU_DEREFINE); + + deref_SphP = mymalloc_movable(&deref_SphP, "deref_SphP", NumGas * sizeof(struct derefine_particle_data)); + + FlagExch = mymalloc_movable(&FlagExch, "FlagExch", Mesh_nimport * sizeof(struct flagexch)); + + /* first, check whether we have cells to derefine */ + for(idx = 0, count = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; +#ifdef REFINEMENT_SPLIT_CELLS + FlagDoNotRefine[i] = 0; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + + if(i >= NumGas) + terminate("index of gas cell greater than NumGas"); + + deref_SphP[i].Flag = 0; + deref_SphP[i].dp_index = -1; + + if(derefine_should_this_cell_be_merged(i, deref_SphP[i].Flag)) + { + deref_SphP[i].Flag = 1; + count++; + } + } + + MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("DEREFINE: Number of cells that want to be de-refined: %d\n", countall); + + if(countall) + { + int max_assumed_ntri = 0; + + /* tell the ghost cells whether they want to be refined or not */ + derefine_exchange_flag(); + + /* let's create an explicit list of the neighbors of each cell */ + + first_ngb = mymalloc("first_ngb", Mesh.Ndp * sizeof(int)); + ngb = mymalloc("ngb", 2 * Mesh.Nvf * sizeof(struct ngb_data)); + + last_ngb = mymalloc("last_ngb", Mesh.Ndp * sizeof(int)); + + for(i = 0; i < Mesh.Ndp; i++) + first_ngb[i] = last_ngb[i] = -1; + + for(i = 0, first_free_ngb = 0; i < Mesh.Nvf; i++) + { + derefine_add_ngb(i, Mesh.VF[i].p1, Mesh.VF[i].p2, Mesh.VF[i].area, Mesh.VF[i].t, Mesh.VF[i].nr); + derefine_add_ngb(i, Mesh.VF[i].p2, Mesh.VF[i].p1, Mesh.VF[i].area, Mesh.VF[i].t, Mesh.VF[i].nr); + } + + myfree(last_ngb); + + /* we now make a list of the delaunay points that we can sort in a globally unique way */ + flag_DP = mymalloc_movable(&flag_DP, "flag_DP", Mesh.Ndp * sizeof(struct flag_delaunay_data)); + seq_DP = mymalloc("seq_DP", Mesh.Ndp * sizeof(struct seq_delaunay_data)); + + for(i = 0; i < Mesh.Ndp; i++) + { + seq_DP[i].rank = i; + seq_DP[i].index = Mesh.DP[i].index; + + if(Mesh.DP[i].task == ThisTask) + { + int li = Mesh.DP[i].index; + if(li < 0) + { + flag_DP[i].Flag = 0; + seq_DP[i].ID = 0; + seq_DP[i].rnd = 0; + } + else + { + if(li < NumGas) + if(deref_SphP[li].dp_index < 0) + deref_SphP[li].dp_index = i; /* only guaranteed to be set for active cells */ + + if(li >= NumGas) + li -= NumGas; + + flag_DP[i].Flag = deref_SphP[li].Flag; + seq_DP[i].ID = P[li].ID; + seq_DP[i].rnd = get_random_number(); + } + } + else + { + flag_DP[i].Flag = FlagExch[Mesh.DP[i].index].Flag; + seq_DP[i].ID = FlagExch[Mesh.DP[i].index].ID; + seq_DP[i].rnd = get_random_number(); + } + } + + /* sort according to ID */ + mysort(seq_DP, Mesh.Ndp, sizeof(struct seq_delaunay_data), derefine_compare_seq_DP_ID); + + /* now let's go through in sorted order. For each cell that is supposed to be refined, check whether any of the + * neighbors is already refined. If yes, don't allow it to be refined. + * Also, if there is a neighbour with the same ID, don't refine it, because this must be a mirrored particle + */ + + for(i = 0; i < Mesh.Ndp; i++) + { + if(seq_DP[i].ID != 0) + { + j = seq_DP[i].rank; + + if(flag_DP[j].Flag == 1) /* this cell is still eligible for derefinement */ + { + /* go through its neighbours and check whether one of them is already up for derefinement */ + + int n = 0; + k = first_ngb[j]; + while(k >= 0) + { + /* we only need to consider neighboring cells if they are active */ + int q = ngb[k].index; + + if(q >= 0) + { + int timebin; + + if(Mesh.DP[q].task == ThisTask) + { + if(Mesh.DP[q].index < NumGas) + timebin = P[Mesh.DP[q].index].TimeBinHydro; + else + timebin = P[Mesh.DP[q].index - NumGas].TimeBinHydro; + } + else + { +#ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT + timebin = PrimExch[Mesh.DP[q].index].TimeBinHydro; +#else /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */ + timebin = RefExch[Mesh.DP[q].index].TimeBinHydro; +#endif /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT #else */ + } + + if(TimeBinSynchronized[timebin]) + { + if(flag_DP[q].Flag == 2 || flag_DP[q].Flag == 3) + n++; + + if(Mesh.DP[q].ID == seq_DP[i].ID) /* same ID, so we have a mirrored particle */ + n++; + } + } + + k = ngb[k].next_ngb; + } + + if(n == 0) /* ok, none have been found. This means this cell is allowed to be refined */ + flag_DP[j].Flag = 2; + else + flag_DP[j].Flag = 3; + } + } + } + + myfree(seq_DP); + + /* copy of the refinement flags to the cell structure */ + for(i = 0; i < Mesh.Ndp; i++) + if(Mesh.DP[i].task == ThisTask && Mesh.DP[i].index >= 0 && Mesh.DP[i].index < NumGas) + deref_SphP[Mesh.DP[i].index].Flag = flag_DP[i].Flag; + + myfree(flag_DP); + + /* now let's count again how many cells we would like to derefine */ + + for(idx = 0, count = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(deref_SphP[i].Flag == 2) + count++; + } + + int in[2], out[2]; + in[0] = count; + + /* now we carry out an auxiliary check to make sure that we really + avoid de-refining two neighboring cells. If such a pair is + found, both cells will not be derefined. */ + + MaxNflux = Mesh.Indi.AllocFacNflux; + Nflux = 0; + ProbeList = mymalloc_movable(&ProbeList, "ProbeList", MaxNflux * sizeof(struct probe_list_data)); + + count = 0; + + for(idx = 0, count = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(deref_SphP[i].Flag == 2) + { + j = deref_SphP[i].dp_index; /* this is the delaunay point of this cell */ + if(j < 0) + terminate("j < 0"); + + k = first_ngb[j]; + + int flag = 0; + + while(k >= 0) + { + if(ngb[k].area > DEREF_SA_FAC * SphP[i].SurfaceArea) + { + int q = ngb[k].index; + + if(Mesh.DP[q].task == ThisTask) + { + int p = Mesh.DP[q].index; + + if(p < 0) + terminate("p < 0"); + + if(p >= NumGas) /* this is a local ghost point */ + p -= NumGas; + + if(TimeBinSynchronized[P[p].TimeBinHydro]) + if(deref_SphP[p].Flag == 2) + flag++; + } + else + { + /* here we have a foreign ghost point */ + if(Nflux >= MaxNflux) + { + Mesh.Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNflux = Mesh.Indi.AllocFacNflux; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNflux=%d Indi.AllocFacNflux=%g\n", ThisTask, MaxNflux, + Mesh.Indi.AllocFacNflux); +#endif /* #ifdef VERBOSE */ + ProbeList = myrealloc_movable(ProbeList, MaxNflux * sizeof(struct probe_list_data)); + + if(Nflux >= MaxNflux) + terminate("Nflux >= MaxNflux"); + } + + ProbeList[Nflux].task = Mesh.DP[q].task; + ProbeList[Nflux].index = Mesh.DP[q].originalindex; + ProbeList[Nflux].sendpart = i; + ProbeList[Nflux].flag = 0; + + Nflux++; + } + } + k = ngb[k].next_ngb; + } + + if(flag) + { + /* ups. It looks like a neigboring point is also about to be dissolved. We hence do not + dissolve the current point + */ + deref_SphP[i].Flag = 0; + count++; + } + } + } + + /* now let's probe on other tasks */ + + derefine_apply_probe_list(); + + for(i = 0; i < Nflux; i++) + { + if(ProbeList[i].flag) + if(deref_SphP[ProbeList[i].sendpart].Flag == 2) + { + deref_SphP[ProbeList[i].sendpart].Flag = 0; + count++; + } + } + + myfree(ProbeList); + + in[1] = count; + MPI_Reduce(in, out, 2, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + mpi_printf("DEREFINE: Number of cells that we could de-refine: %d, number of cells we exclude from this set: %d\n", out[0], + out[1]); + + /* we now distribute the conserved quantities of the cell among the neighbours */ + + MaxNflux = Mesh.Indi.AllocFacNflux; + Nflux = 0; + FluxList = mymalloc_movable(&FluxList, "FluxList", MaxNflux * sizeof(struct flux_list_data)); + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(deref_SphP[i].Flag == 2) + { + j = deref_SphP[i].dp_index; /* this is the delaunay point of this cell */ + if(j < 0) + terminate("j < 0"); + + max_n_tri = 300000; + n_tri = 0; + + trilist = mymalloc("trilist", max_n_tri * sizeof(triangle)); + + /* get a list of all the triangles that make up the Voronoi cell of j */ + k = first_ngb[j]; + while(k >= 0) + { + n_tri = derefine_refine_get_triangles(&Mesh, ngb[k].t, ngb[k].nr, &Mesh.DP[j], trilist, n_tri, max_n_tri); + + k = ngb[k].next_ngb; + } + + /* assign the first point as owner to all tetras */ + k = first_ngb[j]; + int q = ngb[k].index; + int t; + for(t = 0; t < n_tri; t++) + trilist[t].owner = q; + + double vol = 0; + for(k = 0; k < n_tri; k++) + vol += get_tri_volume(k, trilist); + + /* now consider all the other points and split the triangles if needed */ + k = first_ngb[j]; + k = ngb[k].next_ngb; + while(k >= 0) + { + int q = ngb[k].index; + n_tri = derefine_add_point_and_split_tri(q, trilist, n_tri, max_n_tri, vol); + k = ngb[k].next_ngb; + } + + if(n_tri > max_assumed_ntri) + max_assumed_ntri = n_tri; + + double *volume = mymalloc("volume", Mesh.Ndp * sizeof(double)); + + /* clear the volume entries of the neighbors */ + k = first_ngb[j]; + while(k >= 0) + { + int q = ngb[k].index; + volume[q] = 0; + k = ngb[k].next_ngb; + } + + /* now assign the volume of the triangles to the neighbors */ + for(k = 0; k < n_tri; k++) + { + if(trilist[k].owner < 0 || trilist[k].owner >= Mesh.Ndp) + terminate("strange owner"); + + volume[trilist[k].owner] += get_tri_volume(k, trilist); + } + + /* first, let's establish the surface area sum for this cell */ + double voltot = 0; + k = first_ngb[j]; + while(k >= 0) + { + if(ngb[k].area > DEREF_SA_FAC * SphP[i].SurfaceArea) + { + int q = ngb[k].index; + voltot += volume[q]; + } + k = ngb[k].next_ngb; + } + + /* now, distribute conserved quantities proportional to the gained volume */ + double facsum = 0; + k = first_ngb[j]; + while(k >= 0) + { + if(ngb[k].area > DEREF_SA_FAC * SphP[i].SurfaceArea) + { + int q = ngb[k].index; + + double fac = volume[q] / voltot; + + if(fac < 0) + { + warn("strange: fac=%g\n", fac); + fac = 0; + // terminate("strange"); + } + facsum += fac; + + if(Mesh.DP[q].task == ThisTask) + { + int p = Mesh.DP[q].index; + + if(p < 0) + terminate("p < 0"); + + if(p >= NumGas) /* this is a local ghost point */ + p -= NumGas; + P[p].Mass += fac * P[i].Mass; + SphP[p].Momentum[0] += fac * SphP[i].Momentum[0]; + SphP[p].Momentum[1] += fac * SphP[i].Momentum[1]; + SphP[p].Momentum[2] += fac * SphP[i].Momentum[2]; + +#ifdef MHD + SphP[p].BConserved[0] += fac * SphP[i].BConserved[0]; + SphP[p].BConserved[1] += fac * SphP[i].BConserved[1]; + SphP[p].BConserved[2] += fac * SphP[i].BConserved[2]; +#endif /* #ifdef MHD */ + +#ifndef ISOTHERM_EQS + SphP[p].Energy += fac * SphP[i].Energy; +#endif /* #ifndef ISOTHERM_EQS */ + +#ifdef MAXSCALARS + for(int s = 0; s < N_Scalar; s++) + *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[s].offset_mass) += + fac * (*(MyFloat *)(((char *)(&SphP[i])) + scalar_elements[s].offset_mass)); +#endif /* #ifdef MAXSCALARS */ + +#ifdef REFINEMENT_SPLIT_CELLS + FlagDoNotRefine[p] = 1; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + } + else + { + /* here we have a foreign ghost point */ + if(Mesh.DP[q].originalindex < 0) + { + char buf[1000]; + sprintf(buf, "---> task=%d q=%d j=%d Ndp=%d\n", ThisTask, q, j, Mesh.Ndp); + terminate(buf); + } + + if(Nflux >= MaxNflux) + { + Mesh.Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNflux = Mesh.Indi.AllocFacNflux; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNflux=%d Indi.AllocFacNflux=%g\n", ThisTask, MaxNflux, + Mesh.Indi.AllocFacNflux); +#endif /* #ifdef VERBOSE */ + FluxList = myrealloc_movable(FluxList, MaxNflux * sizeof(struct flux_list_data)); + + if(Nflux >= MaxNflux) + terminate("Nflux >= MaxNflux"); + } + + FluxList[Nflux].task = Mesh.DP[q].task; + FluxList[Nflux].index = Mesh.DP[q].originalindex; + FluxList[Nflux].dM = fac * P[i].Mass; + FluxList[Nflux].dP[0] = fac * SphP[i].Momentum[0]; + FluxList[Nflux].dP[1] = fac * SphP[i].Momentum[1]; + FluxList[Nflux].dP[2] = fac * SphP[i].Momentum[2]; +#ifdef MHD + FluxList[Nflux].dB[0] = fac * SphP[i].BConserved[0]; + FluxList[Nflux].dB[1] = fac * SphP[i].BConserved[1]; + FluxList[Nflux].dB[2] = fac * SphP[i].BConserved[2]; +#endif /* #ifdef MHD */ + +#ifndef ISOTHERM_EQS + FluxList[Nflux].dEnergy = fac * SphP[i].Energy; +#endif /* #ifndef ISOTHERM_EQS */ + +#ifdef MAXSCALARS + for(int s = 0; s < N_Scalar; s++) + FluxList[Nflux].dConservedScalars[s] = + fac * (*(MyFloat *)(((char *)(&SphP[i])) + scalar_elements[s].offset_mass)); +#endif /* #ifdef MAXSCALARS */ + Nflux++; + } + } + + k = ngb[k].next_ngb; + } + + if(fabs(facsum - 1) > 1.0e-3) + { + char buf[1000]; + sprintf(buf, "facsum=%g\n", facsum); + terminate(buf); + } + + myfree(volume); + myfree(trilist); + + /* we set the dissolved cell to zero mass and zero ID. It will be eliminated from the list + * of cells in the next domain decomposition + */ + P[i].Mass = 0; + P[i].ID = 0; + P[i].Vel[0] = 0; + P[i].Vel[1] = 0; + P[i].Vel[2] = 0; + + SphP[i].VelVertex[0] = 0; + SphP[i].VelVertex[1] = 0; + SphP[i].VelVertex[2] = 0; + + timebin_remove_particle(&TimeBinsHydro, idx, P[i].TimeBinHydro); + + voronoi_remove_connection(i); + } + } + + /* now let's apply the flux-list */ + derefine_apply_flux_list(); + myfree(FluxList); + + myfree(ngb); + myfree(first_ngb); + +#ifdef VERBOSE + MPI_Reduce(&max_assumed_ntri, &n_tri, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + if(ThisTask == 0) + printf("DEREFINE: maximum assumed n_tri = %d\n", n_tri); +#endif /* #ifdef VERBOSE */ + } + + myfree(FlagExch); + myfree(deref_SphP); + + /* remove removed cells from list of active gravity cells */ + timebin_cleanup_list_of_active_particles(&TimeBinsGravity); + + TIMER_STOP(CPU_DEREFINE); + + return countall; +} + +/*! \brief Communicates probe list data if needed. + * + * \return void + */ +static void derefine_apply_probe_list(void) +{ + int i, j, p, nimport, ngrp, recvTask; + + /* now exchange the probe-list and apply it where needed */ + + mysort(ProbeList, Nflux, sizeof(struct probe_list_data), derefine_probe_list_data_compare_task); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < Nflux; i++) + Send_count[ProbeList[i].task]++; + + if(Send_count[ThisTask] > 0) + terminate("Send_count[ThisTask]"); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct probe_list_data *ProbeListGet = (struct probe_list_data *)mymalloc("ProbeListGet", nimport * sizeof(struct probe_list_data)); + + /* exchange particle data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&ProbeList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, + recvTask, TAG_DENS_A, &ProbeListGet[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + /* apply the probes */ + + for(i = 0; i < nimport; i++) + { + p = ProbeListGet[i].index; + + if(TimeBinSynchronized[P[p].TimeBinHydro]) + if(deref_SphP[p].Flag == 2) + ProbeListGet[i].flag = 1; + } + + /* send results back */ + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&ProbeListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, + recvTask, TAG_DENS_A, &ProbeList[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(ProbeListGet); +} + +/*! \brief Communicate flux list data if needed. + * + * \return void + */ +static void derefine_apply_flux_list(void) +{ + int i, j, p, nimport, ngrp, recvTask; + + /* now exchange the flux-list and apply it when needed */ + + mysort(FluxList, Nflux, sizeof(struct flux_list_data), derefine_flux_list_data_compare); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < Nflux; i++) + Send_count[FluxList[i].task]++; + + if(Send_count[ThisTask] > 0) + terminate("Send_count[ThisTask]"); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct flux_list_data *FluxListGet = (struct flux_list_data *)mymalloc("FluxListGet", nimport * sizeof(struct flux_list_data)); + + /* exchange particle data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&FluxList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct flux_list_data), MPI_BYTE, recvTask, + TAG_DENS_A, &FluxListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct flux_list_data), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + /* apply the fluxes */ + + for(i = 0; i < nimport; i++) + { + p = FluxListGet[i].index; + + if(P[p].ID == 0) + { + char buf[1000]; +#ifndef LONGIDS + printf("On task=%d flux to ID=%d, but this is already deleted (index p=%d)\n", ThisTask, P[p].ID, p); +#else /* #ifndef LONGIDS */ + printf("On task=%d flux to ID=%llu, but this is already deleted (index p=%d)\n", ThisTask, P[p].ID, p); +#endif /* #ifndef LONGIDS #else */ + terminate(buf); + } + + P[p].Mass += FluxListGet[i].dM; + SphP[p].Momentum[0] += FluxListGet[i].dP[0]; + SphP[p].Momentum[1] += FluxListGet[i].dP[1]; + SphP[p].Momentum[2] += FluxListGet[i].dP[2]; +#ifdef MHD + SphP[p].BConserved[0] += FluxListGet[i].dB[0]; + SphP[p].BConserved[1] += FluxListGet[i].dB[1]; + SphP[p].BConserved[2] += FluxListGet[i].dB[2]; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + int k; + for(k = 0; k < N_Scalar; k++) + *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[k].offset_mass) += FluxListGet[i].dConservedScalars[k]; +#endif /* #ifdef MAXSCALARS */ + +#ifndef ISOTHERM_EQS + SphP[p].Energy += FluxListGet[i].dEnergy; +#endif /* #ifndef ISOTHERM_EQS */ + +#ifdef REFINEMENT_SPLIT_CELLS + FlagDoNotRefine[p] = 1; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + } + + myfree(FluxListGet); +} + +/*! \brief Compares flux list data task of two elements. + * + * \param[in] a Pointer to first flux list data object. + * \param[in] b Pointer to second flux list data object. + * + * \return (-1,0,1); -1 if a->task < b->task. + */ +static int derefine_flux_list_data_compare(const void *a, const void *b) +{ + if(((struct flux_list_data *)a)->task < (((struct flux_list_data *)b)->task)) + return -1; + + if(((struct flux_list_data *)a)->task > (((struct flux_list_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Compares probe list data task of two elements. + * + * \param[in] a Pointer to first probe list data object. + * \param[in] b Pointer to second probe list data object. + * + * \return (-1,0,1); -1 if a->task < b->task. + */ +static int derefine_probe_list_data_compare_task(const void *a, const void *b) +{ + if(((struct probe_list_data *)a)->task < (((struct probe_list_data *)b)->task)) + return -1; + + if(((struct probe_list_data *)a)->task > (((struct probe_list_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Compares seq delaunay data task of two elements. + * + * Comparison criteria (most important first) + * rnd + * ID + * index + * rank + * + * \param[in] a Pointer to first seq delaunay data object. + * \param[in] b Pointer to second seq delaunay data object. + * + * \return (-1,0,1); -1 if a < b. + */ +static int derefine_compare_seq_DP_ID(const void *a, const void *b) +{ + if(((struct seq_delaunay_data *)a)->rnd < (((struct seq_delaunay_data *)b)->rnd)) + return -1; + + if(((struct seq_delaunay_data *)a)->rnd > (((struct seq_delaunay_data *)b)->rnd)) + return +1; + + if(((struct seq_delaunay_data *)a)->ID < (((struct seq_delaunay_data *)b)->ID)) + return -1; + + if(((struct seq_delaunay_data *)a)->ID > (((struct seq_delaunay_data *)b)->ID)) + return +1; + + if(((struct seq_delaunay_data *)a)->index < (((struct seq_delaunay_data *)b)->index)) + return -1; + + if(((struct seq_delaunay_data *)a)->index > (((struct seq_delaunay_data *)b)->index)) + return +1; + + if(((struct seq_delaunay_data *)a)->rank < (((struct seq_delaunay_data *)b)->rank)) + return -1; + + if(((struct seq_delaunay_data *)a)->rank > (((struct seq_delaunay_data *)b)->rank)) + return +1; + + return 0; +} + +/*! \brief Sets exchange flag in de-refinement algorithm. + * + * Loops through gas cells in mesh, sets set export flag and communicates this + * information to the appropriate tasks. + * + * \return void + */ +static void derefine_exchange_flag(void) +{ + int listp; + int i, j, p, task, off; + int ngrp, recvTask, place; + + struct exchange_data + { + int Flag; + MyIDType ID; + } * tmpExch, *tmpRecv; + + tmpExch = (struct exchange_data *)mymalloc("tmpExch", Mesh_nexport * sizeof(struct exchange_data)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + tmpExch[off].Flag = 0; + tmpExch[off].ID = P[place].ID; + + if(P[place].Type == 0) + if(TimeBinSynchronized[P[place].TimeBinHydro]) + if(!(P[place].Mass == 0 && P[place].ID == 0)) + tmpExch[off].Flag = deref_SphP[place].Flag; + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + tmpRecv = (struct exchange_data *)mymalloc("tmpRecv", Mesh_Recv_count[recvTask] * sizeof(struct exchange_data)); + + /* get the values */ + MPI_Sendrecv(&tmpExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct exchange_data), MPI_BYTE, + recvTask, TAG_DENS_A, tmpRecv, Mesh_Recv_count[recvTask] * sizeof(struct exchange_data), MPI_BYTE, recvTask, + TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + for(i = 0; i < Mesh_Recv_count[recvTask]; i++) + { + if(Mesh_Recv_offset[recvTask] + i >= Mesh_nimport) + terminate("number of imported mesh points grater than Mesh_nimport"); + FlagExch[Mesh_Recv_offset[recvTask] + i].Flag = tmpRecv[i].Flag; + FlagExch[Mesh_Recv_offset[recvTask] + i].ID = tmpRecv[i].ID; + } + + myfree(tmpRecv); + } + } + } + + myfree(tmpExch); +} + +#endif /* #if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c new file mode 100644 index 0000000000..7640029045 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c @@ -0,0 +1,1037 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_dynamic_update.c + * \date 05/2018 + * \brief Algorithms for Voronoi dynamic update. + * \details contains functions: + * int voronoi_get_connected_particles(tessellation * T) + * void voronoi_init_connectivity(tessellation * T) + * void voronoi_update_connectivity(tessellation * T) + * void voronoi_remove_connection(int i) + * int compare_foreign_connection(const void *a, const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 22.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +int Nvc; /* number of connections */ +int MaxNvc; /* maximum number of connections */ +int Largest_Nvc; +connection *DC; /* Connections */ + +/*! Data structure for non-local connection. + */ +struct foreign_connection +{ + int task; + int origin; + int index; + int image_flags; +} * ForeignDC, *ImportedDC; + +#define MASK_X_SHIFT_RIGHT 38347922 +#define MASK_X_SHIFT_LEFT 76695844 +#define MASK_Y_SHIFT_RIGHT 14708792 +#define MASK_Y_SHIFT_LEFT 117670336 +#define MASK_Z_SHIFT_RIGHT 261632 +#define MASK_Z_SHIFT_LEFT 133955584 +#define MASK ((1 << 27) - 1) + +int FirstUnusedConnection; + +/*! \brief Gets connected active cells from a mesh. + * + * \param[in] T Pointer to tesselation. + * + * \return Number of cells. + */ +int voronoi_get_connected_particles(tessellation *T) +{ + int idx, i, j, p, q, count = 0, duplicates, image_flags, listp, nexport, nimport, origin; + int ngrp, recvTask; + + CPU_Step[CPU_MISC] += measure_time(); + + /* first, let's add all the primary active points */ + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + p = TimeBinsHydro.ActiveParticleList[idx]; + if(p < 0) + continue; + + if(P[p].Type == 0) + { + Ngb_Marker[p] = Ngb_MarkerValue; + + if(P[p].Mass == 0 && P[p].ID == 0) /* skip cells that have been swallowed or eliminated */ + { + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + continue; + } + + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("VORONOI: Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 1; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = ThisTask; + ListExports[List_P[p].currentexport].index = p; + + if(T->Ndp >= T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("VORONOI: Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, + T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(T->Ndp >= T->MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + point *dp = &T->DP[T->Ndp]; + + dp->x = P[p].Pos[0]; + dp->y = P[p].Pos[1]; + dp->z = P[p].Pos[2]; + dp->ID = P[p].ID; + dp->task = ThisTask; + dp->index = p; + dp->originalindex = -1; + dp->timebin = P[p].TimeBinHydro; + dp->image_flags = 1; +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + T->Ndp++; + count++; + } + } + + /* now, we go through the connection list and see whether we have any additional points to add */ + int count_foreign = 0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + q = SphP[i].first_connection; + + while(q >= 0) + { + if(q < 0 || q >= MaxNvc) + { + char buf[1000]; + sprintf(buf, "strange connectivity q=%d Nvc=%d", q, MaxNvc); + terminate(buf); + } + + if(DC[q].task >= 0 && DC[q].task < NTask) + { + if(ThisTask == DC[q].task) /* this one is local */ + { + p = DC[q].index; /* particle index */ + + if(P[p].Type == 0) + { + if(!(P[p].Mass == 0 && P[p].ID == 0)) /* skip cells that have been swallowed or dissolved */ + { + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + if(p < 0 || p >= NumGas) + { + char buf[1000]; + sprintf(buf, "strange p=%d (Ngas=%d) for q=%d Nvc=%d", p, NumGas, q, Nvc); + terminate(buf); + } + + image_flags = (DC[q].image_flags & MASK); + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + listp = List_P[p].firstexport; + + /* now we need to check whether this particle has already been made part of the list */ + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != ThisTask) + terminate("can't be"); + } + else + { + /* this one apparently hasn't been added at all yet */ + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, + MaxNinlist, T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = ThisTask; + ListExports[List_P[p].currentexport].index = p; + } + + if(!(ListExports[List_P[p].currentexport].image_bits & image_flags)) /* already in list */ + { + ListExports[List_P[p].currentexport].image_bits |= image_flags; + + if(T->Ndp >= T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, + T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(T->Ndp >= T->MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + MyDouble x = P[p].Pos[0]; + MyDouble y = P[p].Pos[1]; + MyDouble z = P[p].Pos[2]; + + /* for each coordinates there are three possibilities. They are encoded in image_flag to basis three, + * i.e. x*3^0 + y*3^1 + z*3^2 */ + +#ifndef REFLECTIVE_X + if((image_flags & MASK_X_SHIFT_RIGHT)) + x += boxSize_X; + else if((image_flags & MASK_X_SHIFT_LEFT)) + x -= boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + if((image_flags & MASK_X_SHIFT_RIGHT)) + x = -x; + else if((image_flags & MASK_X_SHIFT_LEFT)) + x = 2 * boxSize_X - x; +#endif /* #ifndef REFLECTIVE_X #else */ +#ifndef REFLECTIVE_Y + if((image_flags & MASK_Y_SHIFT_RIGHT)) + y += boxSize_Y; + else if((image_flags & MASK_Y_SHIFT_LEFT)) + y -= boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + if((image_flags & MASK_Y_SHIFT_RIGHT)) + y = -y; + else if((image_flags & MASK_Y_SHIFT_LEFT)) + y = 2 * boxSize_Y - y; +#endif /* #ifndef REFLECTIVE_Y #else */ +#ifndef REFLECTIVE_Z + if((image_flags & MASK_Z_SHIFT_RIGHT)) + z += boxSize_Z; + else if((image_flags & MASK_Z_SHIFT_LEFT)) + z -= boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + if((image_flags & MASK_Z_SHIFT_RIGHT)) + z = -z; + else if((image_flags & MASK_Z_SHIFT_LEFT)) + z = 2 * boxSize_Z - z; +#endif /* #ifndef REFLECTIVE_Z #else */ + + point *dp = &T->DP[T->Ndp]; + + dp->x = x; + dp->y = y; + dp->z = z; + + dp->task = ThisTask; + dp->ID = P[p].ID; + if(image_flags != 1) + dp->index = p + NumGas; /* this is a replicated/mirrored local point */ + else + dp->index = p; /* this is actually a local point that wasn't made part of the mesh yet */ + dp->originalindex = p; + dp->timebin = P[p].TimeBinHydro; + + dp->image_flags = image_flags; +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + T->Ndp++; + count++; + } + } + } + } + else + { + /* here we have a foreign neighbor that we want */ + count_foreign++; + } + } + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + } + + /* we now compile a list of the foreign neighbors we want in the mesh */ + + ForeignDC = mymalloc_movable(&ForeignDC, "ForeignDC", count_foreign * sizeof(struct foreign_connection)); + + int count_foreign_bak = count_foreign; + + count_foreign = 0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + q = SphP[i].first_connection; + + while(q >= 0) + { + if(DC[q].task >= 0 && DC[q].task < NTask) + { + if(ThisTask != DC[q].task) /* this one is not local */ + { + p = DC[q].index; + + ForeignDC[count_foreign].task = DC[q].task; + ForeignDC[count_foreign].origin = ThisTask; + ForeignDC[count_foreign].index = DC[q].index; + ForeignDC[count_foreign].image_flags = (DC[q].image_flags & MASK); + + /* here we have a foreign neighbor that we want */ + count_foreign++; + } + } + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + } + + if(count_foreign_bak != count_foreign) + terminate("bad"); + + /* we sort this list by tasks, and then eliminate duplicates */ + mysort(ForeignDC, count_foreign, sizeof(struct foreign_connection), compare_foreign_connection); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0, j = -1, duplicates = 0; i < count_foreign; i++) + { + if(j >= 0) + if(memcmp(&ForeignDC[i], &ForeignDC[j], sizeof(struct foreign_connection)) == 0) + { + duplicates++; + continue; + } + + j++; + + ForeignDC[j] = ForeignDC[i]; + Send_count[ForeignDC[j].task]++; + } + + count_foreign -= duplicates; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + if(nexport != count_foreign) + { + char buf[1000]; + sprintf(buf, "nexport=%d count_foreign=%d\n", nexport, count_foreign); + terminate(buf); + } + + if(Send_count[ThisTask] != 0) + terminate("bad"); + + ImportedDC = mymalloc_movable(&ImportedDC, "ImportedDC", nimport * sizeof(struct foreign_connection)); + + /* get the point requests */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + MPI_Sendrecv(&ForeignDC[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct foreign_connection), MPI_BYTE, + recvTask, TAG_DENS_B, &ImportedDC[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct foreign_connection), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + point *DP_Buffer = (point *)mymalloc_movable(&DP_Buffer, "DP_Buffer", nimport * sizeof(point)); + + /* now we prepare the points */ + for(j = 0; j < NTask; j++) + Recv_count[j] = 0; + + for(i = 0; i < nimport; i++) + { + p = ImportedDC[i].index; + origin = ImportedDC[i].origin; + image_flags = ImportedDC[i].image_flags; + + /* it could happen that the requested point has been refined or was turned into a star, that's why + * we not necessarily will find all the points requested. + */ + if(P[p].Type != 0) + continue; + + if(P[p].Mass == 0 && P[p].ID == 0) + continue; /* skip cells that have been swallowed or dissolved */ + + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + /* mark the points in the export lists */ + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + if(ListExports[listp].nextexport < 0) + { + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_P[p].currentexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + ListExports[listp].nextexport = List_P[p].currentexport; + break; + } + listp = ListExports[listp].nextexport; + } + } + } + else + { + /* here we have a local particle that hasn't been made part of the mesh */ + + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + } + + ListExports[List_P[p].currentexport].image_bits |= image_flags; + + MyDouble x = P[p].Pos[0]; + MyDouble y = P[p].Pos[1]; + MyDouble z = P[p].Pos[2]; + + /* for each coordinates there are three possibilities. They are encoded in image_flag to basis three, i.e. x*3^0 + y*3^1 + z*3^2 + */ +#ifndef REFLECTIVE_X + if((image_flags & MASK_X_SHIFT_RIGHT)) + x += boxSize_X; + else if((image_flags & MASK_X_SHIFT_LEFT)) + x -= boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + if((image_flags & MASK_X_SHIFT_RIGHT)) + x = -x; + else if((image_flags & MASK_X_SHIFT_LEFT)) + x = 2 * boxSize_X - x; +#endif /* #ifndef REFLECTIVE_X #else */ + +#ifndef REFLECTIVE_Y + if((image_flags & MASK_Y_SHIFT_RIGHT)) + y += boxSize_Y; + else if((image_flags & MASK_Y_SHIFT_LEFT)) + y -= boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + if((image_flags & MASK_Y_SHIFT_RIGHT)) + y = -y; + else if((image_flags & MASK_Y_SHIFT_LEFT)) + y = 2 * boxSize_Y - y; +#endif /* #ifndef REFLECTIVE_Y #else */ + +#ifndef REFLECTIVE_Z + if((image_flags & MASK_Z_SHIFT_RIGHT)) + z += boxSize_Z; + else if((image_flags & MASK_Z_SHIFT_LEFT)) + z -= boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + if((image_flags & MASK_Z_SHIFT_RIGHT)) + z = -z; + else if((image_flags & MASK_Z_SHIFT_LEFT)) + z = 2 * boxSize_Z - z; +#endif /* #ifndef REFLECTIVE_Z #else */ + + int k = Recv_offset[origin] + Recv_count[origin]++; + + SphP[p].ActiveArea = 0; + + DP_Buffer[k].x = x; + DP_Buffer[k].y = y; + DP_Buffer[k].z = z; + DP_Buffer[k].ID = P[p].ID; + DP_Buffer[k].task = ThisTask; + DP_Buffer[k].index = p; + DP_Buffer[k].originalindex = p; + DP_Buffer[k].timebin = P[p].TimeBinHydro; + + DP_Buffer[k].image_flags = image_flags; +#ifdef DOUBLE_STENCIL + DP_Buffer[k].Hsml = SphP[p].Hsml; + DP_Buffer[k].first_connection = -1; + DP_Buffer[k].last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + } + + /* because we may have dropped some of the points because they were turned + * into stars we need to redetermine the send-offsets and counts + */ + + MPI_Alltoall(Recv_count, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + /* note: the Recv_offsets stay at this point */ + } + } + + /* now get the additional Delaunay points from the other processors */ + + while(nexport + T->Ndp > T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + } + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the Delaunay points */ + + MPI_Sendrecv(&DP_Buffer[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, TAG_DENS_B, + &T->DP[T->Ndp + Send_offset[recvTask]], Send_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, + TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + T->Ndp += nexport; + count += nexport; + + myfree(DP_Buffer); + myfree(ImportedDC); + myfree(ForeignDC); + + mpi_printf("VORONOI: done with connected particles\n"); + + CPU_Step[CPU_MESH_DYNAMIC] += measure_time(); + + /* at this point, it might make sense to sort the Delaunay point again + * according to Peano-Hilbert, in an extended region that allows for the + * ghost regions + */ + + peano_hilbert_order_DP(); + + CPU_Step[CPU_PEANO] += measure_time(); + + return count; +} + +/*! \brief Initialises connectivity. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void voronoi_init_connectivity(tessellation *T) +{ + int i; + + mpi_printf("VORONOI: init connectivity\n"); + + MaxNvc = T->Indi.AllocFacNvc; + DC = mymalloc_movable(&DC, "DC", MaxNvc * sizeof(connection)); + + Nvc = 0; + + /* we use a chaining list to keep track of unused entries in the list of connections */ + /* here we set it up to contain all available spaces */ + FirstUnusedConnection = 0; + for(i = 0; i < MaxNvc - 1; i++) + { + DC[i].next = i + 1; + DC[i].task = -1; /* mark that this is unused */ + } + DC[MaxNvc - 1].next = -1; + DC[MaxNvc - 1].task = -1; + + /* initially, all particle have empty connection lists */ + for(i = 0; i < NumGas; i++) + SphP[i].first_connection = SphP[i].last_connection = -1; + + mpi_printf("VORONOI: done with init of connectivity\n"); +} + +/*! \brief Updates connectivity. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void voronoi_update_connectivity(tessellation *T) +{ + int idx, i, k, q, p_task, p_index, q_task, q_index, q_dp_index, q_image_flags; + MyIDType p_ID; + + CPU_Step[CPU_MISC] += measure_time(); + + /* let's clear the connection lists of active particles */ + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(i >= NumGas) + terminate("i >= NumGas"); + + q = SphP[i].first_connection; + + if(q >= 0) /* we have connections, let's add them to the free list */ + { + while(q >= 0) + { + Nvc--; + DC[q].task = -1; /* mark that this is unused */ + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + + /* we add the new free spots at the beginning of the free list */ + DC[SphP[i].last_connection].next = FirstUnusedConnection; + FirstUnusedConnection = SphP[i].first_connection; + + SphP[i].first_connection = -1; + SphP[i].last_connection = -1; + } + } + + for(i = 0; i < T->Nvf; i++) + { + for(k = 0; k < 2; k++) + { + point *DP = T->DP; + face *VF = T->VF; + + if(k == 0) + { + p_task = DP[VF[i].p1].task; + p_index = DP[VF[i].p1].index; + p_ID = DP[VF[i].p1].ID; + q_task = DP[VF[i].p2].task; + q_index = DP[VF[i].p2].index; + q_dp_index = VF[i].p2; + q_image_flags = (DP[VF[i].p2].image_flags & MASK); + } + else + { + p_task = DP[VF[i].p2].task; + p_index = DP[VF[i].p2].index; + p_ID = DP[VF[i].p2].ID; + q_task = DP[VF[i].p1].task; + q_index = DP[VF[i].p1].index; + q_dp_index = VF[i].p1; + q_image_flags = (DP[VF[i].p1].image_flags & MASK); + } + + if(p_task == ThisTask && p_index >= 0 && p_index < NumGas) + { + if(TimeBinSynchronized[P[p_index].TimeBinHydro]) + { + if(P[p_index].Type != 0) + continue; + + if(P[p_index].Mass == 0 && P[p_index].ID == 0) + continue; /* skip cells that have been swallowed or dissolved */ + + /* need to add the connection to the other point to this particle */ + + if(FirstUnusedConnection < 0 || Nvc == MaxNvc) + { + if(!(FirstUnusedConnection < 0 && Nvc == MaxNvc)) + { + char buf[1000]; + sprintf(buf, "strange: FirstUnusedConnection=%d Nvc=%d MaxNvc=%d\n", FirstUnusedConnection, Nvc, MaxNvc); + terminate(buf); + } + + int n, old_MaxNvc = MaxNvc; + T->Indi.AllocFacNvc *= ALLOC_INCREASE_FACTOR; + MaxNvc = T->Indi.AllocFacNvc; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvc=%d Indi.AllocFacNvc=%g\n", ThisTask, MaxNvc, + T->Indi.AllocFacNvc); +#endif /* #ifdef VERBOSE */ + DC = myrealloc_movable(DC, MaxNvc * sizeof(connection)); + DP = T->DP; + VF = T->VF; + + FirstUnusedConnection = old_MaxNvc; + for(n = old_MaxNvc; n < MaxNvc - 1; n++) + { + DC[n].next = n + 1; + DC[n].task = -1; + } + DC[MaxNvc - 1].next = -1; + DC[MaxNvc - 1].task = -1; + } + + if(SphP[p_index].last_connection >= 0) + { + DC[SphP[p_index].last_connection].next = FirstUnusedConnection; + SphP[p_index].last_connection = FirstUnusedConnection; + } + else + { + SphP[p_index].last_connection = FirstUnusedConnection; + SphP[p_index].first_connection = FirstUnusedConnection; + } + + FirstUnusedConnection = DC[FirstUnusedConnection].next; + Nvc++; + + DC[SphP[p_index].last_connection].task = q_task; + DC[SphP[p_index].last_connection].image_flags = q_image_flags; + DC[SphP[p_index].last_connection].ID = p_ID; + + if(q_task == ThisTask && q_index >= NumGas) + DC[SphP[p_index].last_connection].index = q_index - NumGas; + else + DC[SphP[p_index].last_connection].index = q_index; + + DC[SphP[p_index].last_connection].dp_index = q_dp_index; +#ifdef TETRA_INDEX_IN_FACE + DC[SphP[p_index].last_connection].dt_index = VF[i].dt_index; +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + DC[SphP[p_index].last_connection].vf_index = i; /* index to the corresponding face */ + + if(SphP[p_index].last_connection >= MaxNvc) + { + terminate("this is wrong"); + } + } + } + +#ifdef DOUBLE_STENCIL + int index; + if(k == 0) + index = VF[i].p1; + else + index = VF[i].p2; + + if(!(p_task == ThisTask && p_index >= 0 && p_index < NumGas) && DP[index].flag_primary_triangle > 0 && index >= 0) + { + /* need to add the connection to the other point to this particle */ + + if(FirstUnusedConnection < 0 || Nvc == MaxNvc) + { + if(!(FirstUnusedConnection < 0 && Nvc == MaxNvc)) + { + char buf[1000]; + sprintf(buf, "strange: FirstUnusedConnection=%d Nvc=%d MaxNvc=%d\n", FirstUnusedConnection, Nvc, MaxNvc); + terminate(buf); + } + + int n, old_MaxNvc = MaxNvc; + T->Indi.AllocFacNvc *= ALLOC_INCREASE_FACTOR; + MaxNvc = T->Indi.AllocFacNvc; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvc=%d Indi.AllocFacNvc=%g\n", ThisTask, MaxNvc, + T->Indi.AllocFacNvc); +#endif /* #ifdef VERBOSE */ + DC = myrealloc_movable(DC, MaxNvc * sizeof(connection)); + DP = T->DP; + VF = T->VF; + + FirstUnusedConnection = old_MaxNvc; + for(n = old_MaxNvc; n < MaxNvc - 1; n++) + { + DC[n].next = n + 1; + DC[n].task = -1; + } + DC[MaxNvc - 1].next = -1; + DC[MaxNvc - 1].task = -1; + } + + if(DP[index].last_connection >= 0) + { + DC[DP[index].last_connection].next = FirstUnusedConnection; + DP[index].last_connection = FirstUnusedConnection; + } + else + { + DP[index].last_connection = FirstUnusedConnection; + DP[index].first_connection = FirstUnusedConnection; + } + + FirstUnusedConnection = DC[FirstUnusedConnection].next; + Nvc++; + + DC[DP[index].last_connection].task = q_task; + DC[DP[index].last_connection].image_flags = q_image_flags; + DC[DP[index].last_connection].ID = p_ID; + + if(q_task == ThisTask && q_index >= NumGas) + DC[DP[index].last_connection].index = q_index - NumGas; + else + DC[DP[index].last_connection].index = q_index; + + DC[DP[index].last_connection].dp_index = q_dp_index; + + DC[DP[index].last_connection].vf_index = i; /* index to the corresponding face */ + + if(DP[index].last_connection >= MaxNvc) + { + terminate("this is wrong"); + } + } +#endif /* #ifdef DOUBLE_STENCIL */ + } + } + + mpi_printf("VORONOI: done with updating connectivity.\n"); + + CPU_Step[CPU_MESH_DYNAMIC] += measure_time(); +} + +/*! \brief Remove connection from cell. + * + * \param[in] i Index of cell. + * + * \return void + */ +void voronoi_remove_connection(int i) +{ + int q; + if((q = SphP[i].first_connection) >= 0) /* we have connections, let's add them to the free list */ + { + while(q >= 0) + { + Nvc--; + DC[q].task = -1; /* mark that this is unused */ + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + + /* we add the new free spots at the beginning of the free list */ + DC[SphP[i].last_connection].next = FirstUnusedConnection; + FirstUnusedConnection = SphP[i].first_connection; + + SphP[i].first_connection = -1; + SphP[i].last_connection = -1; + } +} + +/*! \brief Compares two foreign connection objects. + * + * Compares (highest priority first): + * task + * index + * image_flags + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1); -1: a < b. + */ +int compare_foreign_connection(const void *a, const void *b) +{ + if(((struct foreign_connection *)a)->task < (((struct foreign_connection *)b)->task)) + return -1; + + if(((struct foreign_connection *)a)->task > (((struct foreign_connection *)b)->task)) + return +1; + + if(((struct foreign_connection *)a)->index < (((struct foreign_connection *)b)->index)) + return -1; + + if(((struct foreign_connection *)a)->index > (((struct foreign_connection *)b)->index)) + return +1; + + if(((struct foreign_connection *)a)->image_flags < (((struct foreign_connection *)b)->image_flags)) + return -1; + + if(((struct foreign_connection *)a)->image_flags > (((struct foreign_connection *)b)->image_flags)) + return +1; + + return 0; +} diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c new file mode 100644 index 0000000000..9b2f79684e --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c @@ -0,0 +1,531 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_exchange.c + * \date 05/2018 + * \brief Algorithms that handle communication of Voronoi mesh data + * between MPI tasks. + * \details contains functions: + * void mesh_setup_exchange(void) + * void exchange_primitive_variables(void) + * void exchange_primitive_variables_and_gradients(void) + * int compare_primexch(const void *a, const void *b) + * void voronoi_update_ghost_velvertex(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 22.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +/*! \brief Auxiliary data structure for communication of primitive variables. + * + */ +struct data_primexch_compare +{ + int rank, task, index; +} * SortPrimExch, *SortPrimExch2; + +/*! \brief Prepares exchange of primitive variables. + * + * \return void + */ +void mesh_setup_exchange(void) +{ + if(All.TotNumGas == 0) + return; + + TIMER_START(CPU_MESH_EXCHANGE); + + int listp; + struct indexexch + { + int task, index; + } * tmpIndexExch, *IndexExch; + int i, j, p, task, off, count; + int ngrp, recvTask, place; + + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin != ThisTask) + { + Mesh_Send_count[ListExports[listp].origin]++; + } + listp = ListExports[listp].nextexport; + } + } + + MPI_Alltoall(Mesh_Send_count, 1, MPI_INT, Mesh_Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, Mesh_nimport = 0, Mesh_nexport = 0, Mesh_Recv_offset[0] = 0, Mesh_Send_offset[0] = 0; j < NTask; j++) + { + Mesh_nimport += Mesh_Recv_count[j]; + Mesh_nexport += Mesh_Send_count[j]; + + if(j > 0) + { + Mesh_Send_offset[j] = Mesh_Send_offset[j - 1] + Mesh_Send_count[j - 1]; + Mesh_Recv_offset[j] = Mesh_Recv_offset[j - 1] + Mesh_Recv_count[j - 1]; + } + } + + IndexExch = (struct indexexch *)mymalloc("IndexExch", Mesh_nimport * sizeof(struct indexexch)); + tmpIndexExch = (struct indexexch *)mymalloc("tmpIndexExch", Mesh_nexport * sizeof(struct indexexch)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + tmpIndexExch[off].task = ThisTask; + tmpIndexExch[off].index = place; + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&tmpIndexExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct indexexch), MPI_BYTE, + recvTask, TAG_DENS_A, &IndexExch[Mesh_Recv_offset[recvTask]], + Mesh_Recv_count[recvTask] * sizeof(struct indexexch), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(tmpIndexExch); + + /* now we need to associate the imported data with the points stored in the DP[] array */ + + SortPrimExch = (struct data_primexch_compare *)mymalloc("SortPrimExch", Mesh_nimport * sizeof(struct data_primexch_compare)); + + for(i = 0; i < Mesh_nimport; i++) + { + SortPrimExch[i].rank = i; + SortPrimExch[i].task = IndexExch[i].task; + SortPrimExch[i].index = IndexExch[i].index; + } + + /* let sort the data according to task and index */ + mysort(SortPrimExch, Mesh_nimport, sizeof(struct data_primexch_compare), compare_primexch); + + SortPrimExch2 = (struct data_primexch_compare *)mymalloc("SortPrimExch2", Mesh.Ndp * sizeof(struct data_primexch_compare)); + + for(i = 0, count = 0; i < Mesh.Ndp; i++) + { + if(Mesh.DP[i].task != ThisTask) + { + SortPrimExch2[count].rank = i; + SortPrimExch2[count].task = Mesh.DP[i].task; + SortPrimExch2[count].index = Mesh.DP[i].index; + count++; + } + } + + /* let sort according to task and index */ + mysort(SortPrimExch2, count, sizeof(struct data_primexch_compare), compare_primexch); + + /* count can be larger than nimport because a foreigh particle can appear + multiple times on the local domain, due to periodicity */ + + for(i = 0, j = 0; i < count; i++) + { + if(SortPrimExch2[i].task != SortPrimExch[j].task || SortPrimExch2[i].index != SortPrimExch[j].index) + j++; + + if(j >= Mesh_nimport) + terminate("j >= Mesh_nimport"); + + Mesh.DP[SortPrimExch2[i].rank].index = + SortPrimExch[j].rank; /* note: this change is now permanent and available for next exchange */ + } + + myfree(SortPrimExch2); + myfree(SortPrimExch); + myfree(IndexExch); + + /* allocate structures needed to exchange the actual information for ghost cells */ + PrimExch = (struct primexch *)mymalloc_movable(&PrimExch, "PrimExch", Mesh_nimport * sizeof(struct primexch)); + GradExch = (struct grad_data *)mymalloc_movable(&GradExch, "GradExch", Mesh_nimport * sizeof(struct grad_data)); + + TIMER_STOP(CPU_MESH_EXCHANGE); +} + +/*! \brief Communicate primitive variables across MPI tasks. + * + * This routine is called before gradient calculation, afterwards, + * exchange_primitive_variables_and_gradients is called. + * + * \return void + */ +void exchange_primitive_variables(void) +{ + if(All.TotNumGas == 0) + return; + + TIMER_START(CPU_MESH_EXCHANGE); + + int listp; + struct primexch *tmpPrimExch; + int i, j, p, task, off; + int ngrp, recvTask, place; + + tmpPrimExch = (struct primexch *)mymalloc("tmpPrimExch", Mesh_nexport * sizeof(struct primexch)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + tmpPrimExch[off].Volume = SphP[place].Volume; + + tmpPrimExch[off].Density = SphP[place].Density; + + tmpPrimExch[off].Pressure = SphP[place].Pressure; + +#ifdef MHD + tmpPrimExch[off].B[0] = SphP[place].B[0]; + tmpPrimExch[off].B[1] = SphP[place].B[1]; + tmpPrimExch[off].B[2] = SphP[place].B[2]; +#ifdef MHD_POWELL + tmpPrimExch[off].DivB = SphP[place].DivB; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + + tmpPrimExch[off].OldMass = SphP[place].OldMass; + tmpPrimExch[off].SurfaceArea = SphP[place].SurfaceArea; + tmpPrimExch[off].ActiveArea = SphP[place].ActiveArea; + tmpPrimExch[off].TimeBinHydro = P[place].TimeBinHydro; + +#ifdef MAXSCALARS + for(j = 0; j < N_Scalar; j++) + tmpPrimExch[off].Scalars[j] = *(MyFloat *)(((char *)(&SphP[place])) + scalar_elements[j].offset); +#endif /* #ifdef MAXSCALARS */ + + tmpPrimExch[off].TimeLastPrimUpdate = SphP[place].TimeLastPrimUpdate; + + for(j = 0; j < 3; j++) + { + tmpPrimExch[off].VelGas[j] = P[place].Vel[j]; + tmpPrimExch[off].Center[j] = SphP[place].Center[j]; + } + tmpPrimExch[off].Csnd = get_sound_speed(place); + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&tmpPrimExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct primexch), MPI_BYTE, + recvTask, TAG_DENS_A, &PrimExch[Mesh_Recv_offset[recvTask]], + Mesh_Recv_count[recvTask] * sizeof(struct primexch), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(tmpPrimExch); + + TIMER_STOP(CPU_MESH_EXCHANGE); +} + +/*! \brief Communicate primitive variables and gradients across MPI tasks. + * + * This routine is called after gradient calculation. + * + * \return void + */ +void exchange_primitive_variables_and_gradients(void) +{ + if(All.TotNumGas == 0) + return; + + TIMER_START(CPU_MESH_EXCHANGE); + + int listp; + struct grad_data *tmpGradExch; + struct primexch *tmpPrimExch; + + int i, j, p, task, off; + int ngrp, recvTask, place; + + tmpPrimExch = (struct primexch *)mymalloc("tmpPrimExch", Mesh_nexport * sizeof(struct primexch)); + tmpGradExch = (struct grad_data *)mymalloc("tmpGradExch", Mesh_nexport * sizeof(struct grad_data)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + /* in case previous steps already lowered the Mass, update OldMass to yield together with metallicity vector conservative + * estimate of metal mass of each species contained in cell */ + if(P[p].Mass < SphP[p].OldMass) + SphP[p].OldMass = P[p].Mass; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + tmpPrimExch[off].Volume = SphP[place].Volume; + tmpPrimExch[off].Density = SphP[place].Density; + tmpPrimExch[off].Pressure = SphP[place].Pressure; + +#ifdef MHD + tmpPrimExch[off].B[0] = SphP[place].B[0]; + tmpPrimExch[off].B[1] = SphP[place].B[1]; + tmpPrimExch[off].B[2] = SphP[place].B[2]; +#ifdef MHD_POWELL + tmpPrimExch[off].DivB = SphP[place].DivB; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + + tmpPrimExch[off].OldMass = SphP[place].OldMass; + tmpPrimExch[off].SurfaceArea = SphP[place].SurfaceArea; + tmpPrimExch[off].ActiveArea = SphP[place].ActiveArea; + + tmpPrimExch[off].TimeBinHydro = P[place].TimeBinHydro; + +#ifdef MAXSCALARS + for(j = 0; j < N_Scalar; j++) + tmpPrimExch[off].Scalars[j] = *(MyFloat *)(((char *)(&SphP[place])) + scalar_elements[j].offset); +#endif /* #ifdef MAXSCALARS */ + + tmpPrimExch[off].TimeLastPrimUpdate = SphP[place].TimeLastPrimUpdate; + + for(j = 0; j < 3; j++) + { + tmpPrimExch[off].VelGas[j] = P[place].Vel[j]; + tmpPrimExch[off].Center[j] = SphP[place].Center[j]; + tmpPrimExch[off].VelVertex[j] = SphP[place].VelVertex[j]; + } + + tmpGradExch[off] = SphP[place].Grad; + + tmpPrimExch[off].Csnd = get_sound_speed(place); + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + /* exchange the data */ + MPI_Sendrecv(&tmpPrimExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct primexch), MPI_BYTE, + recvTask, TAG_DENS_A, &PrimExch[Mesh_Recv_offset[recvTask]], + Mesh_Recv_count[recvTask] * sizeof(struct primexch), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + + MPI_Sendrecv(&tmpGradExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct grad_data), MPI_BYTE, + recvTask, TAG_HYDRO_A, &GradExch[Mesh_Recv_offset[recvTask]], + Mesh_Recv_count[recvTask] * sizeof(struct grad_data), MPI_BYTE, recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(tmpGradExch); + myfree(tmpPrimExch); + + TIMER_STOP(CPU_MESH_EXCHANGE); + + /* note: because the sequence is the same as before, we don't have to do the sorts again */ +} + +/*! \brief Compare two data primexch compare objects. + * + * The following variables (most important first): + * task + * index + * + * \param[in] a Pointer to first data primexch compare object. + * \param[in] b Pointer to second data primexch compare object. + * + * \return (-1,0,1); -1 if a < b. + */ +int compare_primexch(const void *a, const void *b) +{ + if(((struct data_primexch_compare *)a)->task < ((struct data_primexch_compare *)b)->task) + return -1; + + if(((struct data_primexch_compare *)a)->task > ((struct data_primexch_compare *)b)->task) + return +1; + + if(((struct data_primexch_compare *)a)->index < ((struct data_primexch_compare *)b)->index) + return -1; + + if(((struct data_primexch_compare *)a)->index > ((struct data_primexch_compare *)b)->index) + return +1; + + return 0; +} + +/*! \brief Communicates vertex velocity divergence data across MPI tasks. + * + * \return 0 + */ +#ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE +void voronoi_update_ghost_velvertex(void) +{ + CPU_Step[CPU_MISC] += measure_time(); + + int listp; + int i, j, p, task, off; + int ngrp, recvTask, place; + struct velvertex_data + { + MyFloat VelVertex[3]; + } * tmpVelVertexExch, *tmpVelVertexRecv; + + tmpVelVertexExch = (struct velvertex_data *)mymalloc("tmpVelVertexExch", Mesh_nexport * sizeof(struct velvertex_data)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + for(j = 0; j < 3; j++) + { + tmpVelVertexExch[off].VelVertex[j] = SphP[place].VelVertex[j]; + } + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + tmpVelVertexRecv = + (struct velvertex_data *)mymalloc("tmpVelVertexRecv", Mesh_Recv_count[recvTask] * sizeof(struct velvertex_data)); + + /* get the values */ + MPI_Sendrecv(&tmpVelVertexExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct velvertex_data), + MPI_BYTE, recvTask, TAG_DENS_A, tmpVelVertexRecv, Mesh_Recv_count[recvTask] * sizeof(struct velvertex_data), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + for(i = 0; i < Mesh_Recv_count[recvTask]; i++) + { + for(j = 0; j < 3; j++) + { + PrimExch[Mesh_Recv_offset[recvTask] + i].VelVertex[j] = tmpVelVertexExch[i].VelVertex[j]; + } + } + + myfree(tmpVelVertexRecv); + } + } + } + + myfree(tmpVelVertexExch); + + CPU_Step[CPU_SET_VERTEXVELS] += measure_time(); +} +#endif /* #ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c new file mode 100644 index 0000000000..6c147b7901 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c @@ -0,0 +1,1773 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_ghost_search.c + * \date 05/2018 + * \brief Algorithms to search for (ghost) cells from other domains. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * int voronoi_ghost_search(tessellation * TT) + * static void voronoi_pick_up_additional_DP_points(void) + * int voronoi_ghost_search_evaluate(tessellation * T, + * int target, int mode, int q, int thread_id) + * int ngb_treefind_ghost_search(tessellation * T, MyDouble + * searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat + * maxdist, int target, int origin, int *startnode, int + * bitflags, int mode, int *nexport, int *nsend_local) + * int ngb_treefind_ghost_search(tessellation * T, MyDouble + * searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat + * maxdist, int target, int origin, int mode, int thread_id, + * int numnodes, int *firstnode) + * int count_undecided_tetras(tessellation * T) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if !defined(ONEDIMS) + +static void voronoi_pick_up_additional_DP_points(void); + +static tessellation *T; + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyDouble RefPos[3]; + MyFloat MaxDist; + int Origin; + + int Firstnode; + +#ifdef EXTENDED_GHOST_SEARCH + unsigned char BitFlagList[NODELISTLENGTH]; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ +} data_in; + +static data_in *DataGet, *DataIn; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + point *DP = T->DP; + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + + int k, q; + + for(k = 0, q = -1; k < (NUMDIMS + 1); k++) + { +#ifndef DOUBLE_STENCIL + if(DP[DT[i].p[k]].task == ThisTask) + if(DP[DT[i].p[k]].index >= 0 && DP[DT[i].p[k]].index < NumGas) + { + if(TimeBinSynchronized[P[DP[DT[i].p[k]].index].TimeBinHydro]) + { + q = DT[i].p[k]; + break; + } + } +#else /* #ifndef DOUBLE_STENCIL */ + if(DP[DT[i].p[k]].flag_primary_triangle && DT[i].p[k] >= 0) + { + q = DT[i].p[k]; + break; + } +#endif /* #ifndef DOUBLE_STENCIL #else */ + } + + if(q == -1) + terminate("q=-1"); + + in->Pos[0] = DTC[i].cx; + in->Pos[1] = DTC[i].cy; + in->Pos[2] = DTC[i].cz; + + in->RefPos[0] = DP[q].x; + in->RefPos[1] = DP[q].y; + in->RefPos[2] = DP[q].z; + + in->Origin = ThisTask; + + in->MaxDist = SphP[DP[q].index].Hsml; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + int Count; /* counts how many have been found */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES || mode == MODE_IMPORTED_PARTICLES) + if(out->Count) + T->DTF[i] -= (T->DTF[i] & 2); +} + +#include "../../utils/generic_comm_helpers2.h" + +#ifdef EXTENDED_GHOST_SEARCH +/*! Data structure for extended ghost search. + */ +static struct data_nodelist_special +{ + unsigned char BitFlagList[NODELISTLENGTH]; +} * DataNodeListSpecial; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + +static point *DP_Buffer; +static int MaxN_DP_Buffer, N_DP_Buffer; +static int NadditionalPoints; +static int *send_count_new; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i, j, q; + + /* do local particles and prepare export list */ + { + int thread_id = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[thread_id].Exportflag[j] = -1; + + while(1) + { + if(Thread[thread_id].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= T->Ndt) + break; + + if((T->DTF[i] & 2) == 0) /* DT that is not flagged as tested ok */ + { + T->DTF[i] |= 2; /* if we find a particle, need to clear this flag again! */ + + point *DP = T->DP; + tetra *DT = T->DT; + + if(DT[i].t[0] < 0) /* deleted ? */ + continue; + + if(DT[i].p[0] == DPinfinity || DT[i].p[1] == DPinfinity || DT[i].p[2] == DPinfinity) + continue; + +#ifndef TWODIMS + if(DT[i].p[3] == DPinfinity) + continue; +#endif /* #ifndef TWODIMS */ + +#ifndef DOUBLE_STENCIL + for(j = 0, q = -1; j < (NUMDIMS + 1); j++) + { + if(DP[DT[i].p[j]].task == ThisTask) + if(DP[DT[i].p[j]].index >= 0 && DP[DT[i].p[j]].index < NumGas) + { + if(TimeBinSynchronized[P[DP[DT[i].p[j]].index].TimeBinHydro]) + { + q = DT[i].p[j]; + break; + } + } + } + + if(j == (NUMDIMS + 1)) /* this triangle does not have a local point. No need to test it */ + continue; + + if(q == -1) + terminate("q==-1"); +#else /* #ifndef DOUBLE_STENCIL */ + /* here comes the check for a double stencil */ + for(j = 0, q = -1; j < (NUMDIMS + 1); j++) + { + if(DP[DT[i].p[j]].flag_primary_triangle && DT[i].p[j] >= 0) + { + q = DT[i].p[j]; + break; + } + } + + if(j == + (NUMDIMS + + 1)) /* this triangle does not have a point which is not at least neighbor to a primary point. No need to test it */ + continue; + + if(q == -1) + terminate("q==-1"); +#endif /* #ifndef DOUBLE_STENCIL #else */ + voronoi_ghost_search_evaluate(T, i, MODE_LOCAL_PARTICLES, q, thread_id); + } + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, count = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = count++; + + if(i >= Nimport) + break; + + voronoi_ghost_search_evaluate(T, i, MODE_IMPORTED_PARTICLES, 0, threadid); + } + } +} + +/*! \brief Main routine to perform ghost search. + * + * \param[in, out] TT Pointer to tessellation. + * + * \return Number of additional points. + */ +int voronoi_ghost_search(tessellation *TT) +{ + T = TT; + int j, ndone, ndone_flag; + + NadditionalPoints = 0; + + /* allocate buffers to arrange communication */ + + send_count_new = (int *)mymalloc_movable(&send_count_new, "send_count_new", NTask * sizeof(int)); + + MaxN_DP_Buffer = T->Indi.AllocFacN_DP_Buffer; + DP_Buffer = (point *)mymalloc_movable(&DP_Buffer, "DP_Buffer", MaxN_DP_Buffer * sizeof(point)); + +#ifdef DOUBLE_STENCIL + { + point *DP = T->DP; + tetra *DT = T->DT; + int i; + + for(i = 0; i < T->Ndp; i++) + DP[i].flag_primary_triangle = 0; + + for(i = 0; i < T->Ndt; i++) + { + for(j = 0; j < (NUMDIMS + 1); j++) + { + if(DP[DT[i].p[j]].task == ThisTask) + if(DP[DT[i].p[j]].index >= 0 && DP[DT[i].p[j]].index < NumGas) + if(TimeBinSynchronized[P[DP[DT[i].p[j]].index].TimeBinHydro]) + break; + } + + if(j != (NUMDIMS + 1)) /* this triangle does have a local point, so mark all its points */ + { + for(j = 0; j < (NUMDIMS + 1); j++) + DP[DT[i].p[j]].flag_primary_triangle = 1; + } + } + } +#endif /* #ifdef DOUBLE_STENCIL */ + + generic_set_MaxNexport(); + + NextParticle = 0; + + do + { + for(j = 0; j < NTask; j++) + send_count_new[j] = 0; + + N_DP_Buffer = 0; + + /* allocate buffers to arrange communication */ + generic_alloc_partlist_nodelist_ngblist_threadbufs(); + + kernel_local(); + + /* do all necessary bookkeeping and the data exchange */ + generic_exchange(kernel_imported); + + generic_free_partlist_nodelist_ngblist_threadbufs(); + + voronoi_pick_up_additional_DP_points(); + + if(NextParticle >= T->Ndt) + ndone_flag = 1; + else + ndone_flag = 0; + + MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + } + while(ndone < NTask); + + myfree(DP_Buffer); + myfree(send_count_new); + +#ifdef EXTENDED_GHOST_SEARCH + myfree(DataNodeListSpecial); +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + + return NadditionalPoints; +} + +/*! \brief Gets additional Delaunay points. + * + * \return void + */ +static void voronoi_pick_up_additional_DP_points(void) +{ + int nimport; + + /* The data blocks stored in DP_Buffer is not ordered according to processor rank, but rather in a permutated way. + * We need to take this into account in calculating the offsets to in the send buffer. + */ + + for(int ngrp = 0, ncnt = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + Send_count[ncnt++] = send_count_new[recvTask]; + } + + Recv_offset[0] = 0; + for(int j = 1; j < NTask; j++) + Recv_offset[j] = Recv_offset[j - 1] + Send_count[j - 1]; + + for(int ngrp = 0, ncnt = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + Send_offset[recvTask] = Recv_offset[ncnt++]; + } + + memcpy(Send_count, send_count_new, NTask * sizeof(int)); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + Recv_offset[0] = 0; + nimport = Recv_count[0]; + + for(int j = 1; j < NTask; j++) + { + nimport += Recv_count[j]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + + while(nimport + T->Ndp > T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(nimport + T->Ndp > T->MaxNdp && NumGas == 0) + terminate("nimport + Ndp > MaxNdp"); + } + + /* get the delaunay points */ + for(int ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&DP_Buffer[Send_offset[recvTask]], Send_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, TAG_DENS_B, + &T->DP[T->Ndp + Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, + TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + T->Ndp += nimport; + NadditionalPoints += nimport; + + if(N_DP_Buffer > Largest_N_DP_Buffer) + Largest_N_DP_Buffer = N_DP_Buffer; +} + +/*! \brief Evaluate function for voronoi_ghost_search. + * + * Called in both mode local particles and then in mode imported particles. + * + * \param[] T Pointer to tessellation. + * \param[in] target index in DTC and DTF arrays. + * \param[in] mode Mode of call (local/imported). + * \param[in] q index in DP array. + * \param[in] thread_id Thread_id, needed for ngb_treefind_ghost_search. + * + * \return 0 + */ +int voronoi_ghost_search_evaluate(tessellation *T, int target, int mode, int q, int thread_id) +{ + int origin, numnodes, *firstnode; + int numngb; + double h, dx, dy, dz, maxdist; + MyDouble pos[3], refpos[3]; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + pos[0] = T->DTC[target].cx; + pos[1] = T->DTC[target].cy; + pos[2] = T->DTC[target].cz; + refpos[0] = T->DP[q].x; + refpos[1] = T->DP[q].y; + refpos[2] = T->DP[q].z; +#ifndef DOUBLE_STENCIL + maxdist = SphP[T->DP[q].index].Hsml; +#else /* #ifndef DOUBLE_STENCIL */ + maxdist = T->DP[q].Hsml; +#endif /* #ifndef DOUBLE_STENCIL #else */ + origin = ThisTask; + + numnodes = 1; + firstnode = NULL; + } + else + { + /* note: we do not use a pointer here to VoroDataGet[target].Pos, because VoroDataGet may be moved in a realloc operation */ + pos[0] = DataGet[target].Pos[0]; + pos[1] = DataGet[target].Pos[1]; + pos[2] = DataGet[target].Pos[2]; + refpos[0] = DataGet[target].RefPos[0]; + refpos[1] = DataGet[target].RefPos[1]; + refpos[2] = DataGet[target].RefPos[2]; + maxdist = DataGet[target].MaxDist; + origin = DataGet[target].Origin; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + dx = refpos[0] - pos[0]; + dy = refpos[1] - pos[1]; + dz = refpos[2] - pos[2]; + + h = 1.0001 * sqrt(dx * dx + dy * dy + dz * dz); + + if(mode == MODE_LOCAL_PARTICLES) + if(maxdist < 2 * h) + T->DTF[target] -= + (T->DTF[target] & + 2); /* since we restrict the search radius, we are not guaranteed to search the full circumcircle of the triangle */ + + numngb = ngb_treefind_ghost_search(T, pos, refpos, h, maxdist, target, origin, mode, thread_id, numnodes, firstnode); + + out.Count = numngb; + + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#ifdef EXTENDED_GHOST_SEARCH /* this allowes for mirrored images in a full 3x3 grid in terms of the principal domain */ +/*! \brief Tree-search algorithm for ghost cells in EXTENDED_GHOST_SEARCH mode. + * + * \param[in] T Pointer to tessellation. + * \param[in] searchcenter[3] Postion of the search center. + * \param[in] refpos[3] Reference position. + * \param[in] hsml Search radius. + * \param[in] maxdist Maximum distance. + * \param[in] target Index in DTF array. + * \param[in] origin Original task. + * \param[in] startnode Startnode. + * \param[in] bitflags Bitflags for ghost search. + * \param[in] mode Mode. + * \param[in, out] nexport Number of exported particles. + * \param[out] nsend_local Array with number of particles to be sent. + * + * \return Number of points found. + */ +int ngb_treefind_ghost_search(tessellation *T, MyDouble searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat maxdist, int target, + int origin, int *startnode, int bitflags, int mode, int *nexport, int *nsend_local) +{ + int i, numngb, no, p, task, nexport_save, ndp_save, nadditionalpoints_save; + int image_flag; + struct NgbNODE *current; + MyDouble dx, dy, dz, hsml2, maxdist2; + int listp; + double dx_ref, dy_ref, dz_ref, mindistance, thisdistance; + double min_x = 0, min_y = 0, min_z = 0; + int min_p = 0, min_imageflag = 0; + MyFloat search_min[3], search_max[3], newcenter[3], newrefpos[3]; + MyFloat refsearch_min[3], refsearch_max[3]; + + nadditionalpoints_save = NadditionalPoints; + ndp_save = T->Ndp; + nexport_save = *nexport; + + numngb = 0; + mindistance = 1.0e70; + + int repx, repy, repz = 0; + int repx_A, repy_A, repz_A; + int repx_B, repy_B, repz_B; + int xbits; + int ybits; + int zbits; + int count; + + if(mode == 0) + { + repx_A = -1; + repx_B = 1; + repy_A = -1; + repy_B = 1; + repz_A = -1; + repz_B = 1; + xbits = ybits = zbits = 0; + } + else + { + zbits = (bitflags / 9); + ybits = (bitflags - zbits * 9) / 3; + xbits = bitflags - zbits * 9 - ybits * 3; + + if(xbits == 1) + repx_A = repx_B = -1; + else if(xbits == 2) + repx_A = repx_B = 1; + else + repx_A = repx_B = 0; + + if(ybits == 1) + repy_A = repy_B = -1; + else if(ybits == 2) + repy_A = repy_B = 1; + else + repy_A = repy_B = 0; + + if(zbits == 1) + repz_A = repz_B = -1; + else if(zbits == 2) + repz_A = repz_B = 1; + else + repz_A = repz_B = 0; + } + + hsml2 = hsml * hsml; + maxdist2 = maxdist * maxdist; + + for(repx = repx_A; repx <= repx_B; repx++) + for(repy = repy_A; repy <= repy_B; repy++) +#if !defined(TWODIMS) + for(repz = repz_A; repz <= repz_B; repz++) +#endif /* #if !defined(TWODIMS) */ + { + image_flag = 0; /* for each coordinate there are three possibilities. + We encodee them to basis three, i.e. x*3^0 + y*3^1 + z*3^2 + */ + if(repx == 0) + { + newcenter[0] = searchcenter[0]; + newrefpos[0] = refpos[0]; + } + else if(repx == -1) + { +#ifndef REFLECTIVE_X + newcenter[0] = searchcenter[0] - boxSize_X; + newrefpos[0] = refpos[0] - boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + newcenter[0] = -searchcenter[0]; + newrefpos[0] = -refpos[0]; +#endif /* #ifndef REFLECTIVE_X #else */ + image_flag += 1; + } + else /* repx == 1 */ + { +#ifndef REFLECTIVE_X + newcenter[0] = searchcenter[0] + boxSize_X; + newrefpos[0] = refpos[0] + boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + newcenter[0] = -searchcenter[0] + 2 * boxSize_X; + newrefpos[0] = -refpos[0] + 2 * boxSize_X; +#endif /* #ifndef REFLECTIVE_X #else */ + image_flag += 2; + } + + if(repy == 0) + { + newcenter[1] = searchcenter[1]; + newrefpos[1] = refpos[1]; + } + else if(repy == -1) + { +#ifndef REFLECTIVE_Y + newcenter[1] = searchcenter[1] - boxSize_Y; + newrefpos[1] = refpos[1] - boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + newcenter[1] = -searchcenter[1]; + newrefpos[1] = -refpos[1]; +#endif /* #ifndef REFLECTIVE_Y #else */ + image_flag += 1 * 3; + } + else /* repy == 1 */ + { +#ifndef REFLECTIVE_Y + newcenter[1] = searchcenter[1] + boxSize_Y; + newrefpos[1] = refpos[1] + boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + newcenter[1] = -searchcenter[1] + 2 * boxSize_Y; + newrefpos[1] = -refpos[1] + 2 * boxSize_Y; +#endif /* #ifndef REFLECTIVE_Y #else */ + image_flag += 2 * 3; + } + + if(repz == 0) + { + newcenter[2] = searchcenter[2]; + newrefpos[2] = refpos[2]; + } +#if !defined(TWODIMS) + else if(repz == -1) + { +#ifndef REFLECTIVE_Z + newcenter[2] = searchcenter[2] - boxSize_Z; + newrefpos[2] = refpos[2] - boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + newcenter[2] = -searchcenter[2]; + newrefpos[2] = -refpos[2]; +#endif /* #ifndef REFLECTIVE_Z #else */ + image_flag += 1 * 9; + } + else /* repz == 1 */ + { +#ifndef REFLECTIVE_Z + newcenter[2] = searchcenter[1] + boxSize_Z; + newrefpos[2] = refpos[1] + boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + newcenter[2] = -searchcenter[2] + 2 * boxSize_Z; + newrefpos[2] = -refpos[2] + 2 * boxSize_Z; +#endif /* #ifndef REFLECTIVE_Z #else */ + image_flag += 2 * 9; + } +#endif /* #if !defined(TWODIMS) */ + + for(i = 0; i < 3; i++) + { + search_min[i] = newcenter[i] - hsml; + search_max[i] = newcenter[i] + hsml; + refsearch_min[i] = newrefpos[i] - maxdist; + refsearch_max[i] = newrefpos[i] + maxdist; + } + + if(mode == 1) + if(bitflags != image_flag) + { + printf("bitflags=%d image_flag=%d xbits=%d ybits=%d zbits=%d \n", bitflags, image_flag, xbits, ybits, zbits); + terminate("problem"); + } + + no = *startnode; + count = 0; + + while(no >= 0) + { + count++; + if(no < Ngb_MaxPart) /* single particle */ + { + p = no; + no = Ngb_Nextnode[no]; + + if(P[p].Type > 0) + continue; + + if(P[p].Mass == 0 && P[p].ID == 0) + continue; /* skip cells that have been swallowed or dissolved */ + + dx = P[p].Pos[0] - newcenter[0]; + dy = P[p].Pos[1] - newcenter[1]; + dz = P[p].Pos[2] - newcenter[2]; + + if(dx * dx + dy * dy + dz * dz > hsml2) + continue; + + dx_ref = P[p].Pos[0] - newrefpos[0]; + dy_ref = P[p].Pos[1] - newrefpos[1]; + dz_ref = P[p].Pos[2] - newrefpos[2]; + + if((thisdistance = dx_ref * dx_ref + dy_ref * dy_ref + dz_ref * dz_ref) > maxdist2) + continue; + + /* now we need to check whether this particle has already been sent to + the requesting cpu for this particular image shift */ + + if(thisdistance >= mindistance) + continue; + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + listp = ListExports[listp].nextexport; + } + + if(listp >= 0) + if((ListExports[listp].image_bits & (1 << image_flag))) /* already in list */ + continue; + } + else + { + if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) /* already in list */ + continue; + } + } + + /* here we have found a new closest particle that has not been inserted yet */ + + numngb = 1; + mindistance = thisdistance; + min_p = p; + min_imageflag = image_flag; + + /* determine the point coordinates in min_x, min_y, min_z */ + if(repx == 0) + min_x = P[p].Pos[0]; + else if(repx == -1) + { +#ifndef REFLECTIVE_X + min_x = P[p].Pos[0] + boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + min_x = -P[p].Pos[0]; +#endif /* #ifndef REFLECTIVE_X #else */ + } + else if(repx == 1) + { +#ifndef REFLECTIVE_X + min_x = P[p].Pos[0] - boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + min_x = -P[p].Pos[0] + 2 * boxSize_X; +#endif /* #ifndef REFLECTIVE_X #else */ + } + + if(repy == 0) + min_y = P[p].Pos[1]; + else if(repy == -1) + { +#ifndef REFLECTIVE_Y + min_y = P[p].Pos[1] + boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + min_y = -P[p].Pos[1]; +#endif /* #ifndef REFLECTIVE_Y #else */ + } + else if(repy == 1) + { +#ifndef REFLECTIVE_Y + min_y = P[p].Pos[1] - boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + min_y = -P[p].Pos[1] + 2 * boxSize_Y; +#endif /* #ifndef REFLECTIVE_Y #else */ + } + + if(repz == 0) + min_z = P[p].Pos[2]; +#if !defined(TWODIMS) + else if(repz == -1) + { +#ifndef REFLECTIVE_Z + min_z = P[p].Pos[2] + boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + min_z = -P[p].Pos[2]; +#endif /* #ifndef REFLECTIVE_Z #else */ + } + else if(repz == 1) + { +#ifndef REFLECTIVE_Z + min_z = P[p].Pos[2] - boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + min_z = -P[p].Pos[2] + 2 * boxSize_Z; +#endif /* #ifndef REFLECTIVE_Z #else */ + } +#endif /* #if !defined(TWODIMS) */ + } + else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */ + { + if(mode == 1) + { + if(no < Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the + branch */ + { + break; + } + } + + current = &Ngb_Nodes[no]; + no = current->u.d.sibling; /* in case the node can be discarded */ + + if(search_min[0] > current->u.d.range_max[0]) + continue; + if(search_max[0] < current->u.d.range_min[0]) + continue; + if(refsearch_min[0] > current->u.d.range_max[0]) + continue; + if(refsearch_max[0] < current->u.d.range_min[0]) + continue; + + if(search_min[1] > current->u.d.range_max[1]) + continue; + if(search_max[1] < current->u.d.range_min[1]) + continue; + if(refsearch_min[1] > current->u.d.range_max[1]) + continue; + if(refsearch_max[1] < current->u.d.range_min[1]) + continue; + + if(search_min[2] > current->u.d.range_max[2]) + continue; + if(search_max[2] < current->u.d.range_min[2]) + continue; + if(refsearch_min[2] > current->u.d.range_max[2]) + continue; + if(refsearch_max[2] < current->u.d.range_min[2]) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else /* pseudo particle */ + { + if(mode == 1) + terminate("mode == 1"); + + if(target >= 0) /* if no target is given, export will not occur */ + { + if(Exportflag[task = DomainTask[no - (Ngb_MaxPart + Ngb_MaxNodes)]] != target) + { + Exportflag[task] = target; + Exportnodecount[task] = NODELISTLENGTH; + } + + if(Exportnodecount[task] == NODELISTLENGTH) + { + if(*nexport >= All.BunchSize) + { + T->Ndp = ndp_save; + NadditionalPoints = nadditionalpoints_save; + *nexport = nexport_save; + if(nexport_save == 0) + terminate( + "nexport_save == 0"); /* in this case, the buffer is too small to process even a single particle */ + for(task = 0; task < NTask; task++) + nsend_local[task] = 0; + for(no = 0; no < nexport_save; no++) + nsend_local[DataIndexTable[no].Task]++; + return -1; + } + Exportnodecount[task] = 0; + Exportindex[task] = *nexport; + DataIndexTable[*nexport].Task = task; + DataIndexTable[*nexport].Index = target; + DataIndexTable[*nexport].IndexGet = *nexport; + *nexport = *nexport + 1; + nsend_local[task]++; + } + + DataNodeListSpecial[Exportindex[task]].BitFlagList[Exportnodecount[task]] = image_flag; + DataNodeListSpecial[Exportindex[task]].NodeList[Exportnodecount[task]++] = + Ngb_DomainNodeIndex[no - (Ngb_MaxPart + Ngb_MaxNodes)]; + + if(Exportnodecount[task] < NODELISTLENGTH) + DataNodeListSpecial[Exportindex[task]].NodeList[Exportnodecount[task]] = -1; + } + + no = Ngb_Nextnode[no - Ngb_MaxNodes]; + continue; + } + } + } + + *startnode = -1; + + if(numngb) + { + p = min_p; + + image_flag = min_imageflag; + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + if(ListExports[listp].nextexport < 0) + { + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_P[p].currentexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + ListExports[listp].nextexport = List_P[p].currentexport; + break; + } + listp = ListExports[listp].nextexport; + } + } + } + else + { + /* here we have a local particle that hasn't been made part of the mesh */ + + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + } + + if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) + terminate("this should not happen"); + + ListExports[List_P[p].currentexport].image_bits |= (1 << image_flag); + + /* add the particle to the ones that need to be exported */ + + if(origin == ThisTask) + { + if(mode == 1) + terminate("mode==1: how can this be?"); + + if(T->Ndp >= T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(T->Ndp >= T->MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + point *dp = &T->DP[T->Ndp]; + dp->x = min_x; + dp->y = min_y; + dp->z = min_z; + dp->task = ThisTask; + dp->ID = P[p].ID; + if(image_flag) + dp->index = p + NumGas; /* this is a replicated/mirrored local point */ + else + dp->index = p; /* this is actually a local point that wasn't made part of the mesh yet */ + dp->originalindex = p; + dp->timebin = P[p].TimeBinHydro; + dp->image_flags = (1 << image_flag); + +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + T->Ndp++; + NadditionalPoints++; + } + else + { + if(mode == 0) + terminate("mode == 0: how can this be?"); + + if(N_DP_Buffer >= MaxN_DP_Buffer) + { + T->Indi.AllocFacN_DP_Buffer *= ALLOC_INCREASE_FACTOR; + MaxN_DP_Buffer = T->Indi.AllocFacN_DP_Buffer; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxN_DP_Buffer=%d Indi.AllocFacN_DP_Buffer=%g\n", ThisTask, MaxN_DP_Buffer, + T->Indi.AllocFacN_DP_Buffer); +#endif /* #ifdef VERBOSE */ + DP_Buffer = (point *)myrealloc_movable(DP_Buffer, MaxN_DP_Buffer * sizeof(point)); + + if(N_DP_Buffer >= MaxN_DP_Buffer) + terminate("(N_DP_Buffer >= MaxN_DP_Buffer"); + } + + SphP[p].ActiveArea = 0; + + DP_Buffer[N_DP_Buffer].x = min_x; + DP_Buffer[N_DP_Buffer].y = min_y; + DP_Buffer[N_DP_Buffer].z = min_z; + DP_Buffer[N_DP_Buffer].ID = P[p].ID; + DP_Buffer[N_DP_Buffer].task = ThisTask; + DP_Buffer[N_DP_Buffer].index = p; + DP_Buffer[N_DP_Buffer].originalindex = p; + DP_Buffer[N_DP_Buffer].timebin = P[p].TimeBinHydro; + DP_Buffer[N_DP_Buffer].image_flags = (1 << image_flag); +#ifdef DOUBLE_STENCIL + DP_Buffer[N_DP_Buffer].Hsml = SphP[p].Hsml; + DP_Buffer[N_DP_Buffer].first_connection = -1; + DP_Buffer[N_DP_Buffer].last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + send_count_new[origin]++; + N_DP_Buffer++; + } + } + + return numngb; +} + +#else /* #ifdef EXTENDED_GHOST_SEARCH */ + +/*! \brief Tree-search algorithm for ghost cells without EXTENDED_GHOST_SEARCH. + * + * \param[in] T Pointer to tessellation. + * \param[in] searchcenter[3] Postion of the search center. + * \param[in] refpos[3] Reference position. + * \param[in] hsml Search radius. + * \param[in] maxdist Maximum distance. + * \param[in] target Index in DTF array. + * \param[in] origin Original task. + * \param[in] mode Mode (local/imported). + * \param[in] thread_id ID of this thread. + * \param[in] numnodes Number of nodes. + * \param[in] firstnode Index of first node. + * + * \return Number of points found. + */ +int ngb_treefind_ghost_search(tessellation *T, MyDouble searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat maxdist, int target, + int origin, int mode, int thread_id, int numnodes, int *firstnode) +{ + int i, k, numngb, no, p; + int image_flag = 0; + struct NgbNODE *current; + MyDouble x, y, z, dx, dy, dz; + int listp; + double dx_ref, dy_ref, dz_ref, mindistance, thisdistance, maxdistSquared, hsmlSquared; + double min_x = 0, min_y = 0, min_z = 0; + int min_p = 0, min_imageflag = 0; + double offx, offy, offz; + MyFloat search_min[3], search_max[3], search_max_Lsub[3], search_min_Ladd[3]; + MyFloat refsearch_min[3], refsearch_max[3], refsearch_max_Lsub[3], refsearch_min_Ladd[3]; + + for(i = 0; i < 3; i++) + { + search_min[i] = searchcenter[i] - hsml; + search_max[i] = searchcenter[i] + hsml; + refsearch_min[i] = refpos[i] - maxdist; + refsearch_max[i] = refpos[i] + maxdist; + } + +#if !defined(REFLECTIVE_X) + search_max_Lsub[0] = search_max[0] - boxSize_X; + search_min_Ladd[0] = search_min[0] + boxSize_X; + refsearch_max_Lsub[0] = refsearch_max[0] - boxSize_X; + refsearch_min_Ladd[0] = refsearch_min[0] + boxSize_X; +#else /* #if !defined(REFLECTIVE_X) */ + search_max_Lsub[0] = 2 * boxSize_X - search_max[0]; + search_min_Ladd[0] = -search_min[0]; + refsearch_max_Lsub[0] = 2 * boxSize_X - refsearch_max[0]; + refsearch_min_Ladd[0] = -refsearch_min[0]; +#endif /* #if !defined(REFLECTIVE_X) #else */ + +#if !defined(REFLECTIVE_Y) + search_max_Lsub[1] = search_max[1] - boxSize_Y; + search_min_Ladd[1] = search_min[1] + boxSize_Y; + refsearch_max_Lsub[1] = refsearch_max[1] - boxSize_Y; + refsearch_min_Ladd[1] = refsearch_min[1] + boxSize_Y; +#else /* #if !defined(REFLECTIVE_Y) */ + search_max_Lsub[1] = 2 * boxSize_Y - search_max[1]; + search_min_Ladd[1] = -search_min[1]; + refsearch_max_Lsub[1] = 2 * boxSize_Y - refsearch_max[1]; + refsearch_min_Ladd[1] = -refsearch_min[1]; +#endif /* #if !defined(REFLECTIVE_Y) #else */ + +#if !defined(REFLECTIVE_Z) + search_max_Lsub[2] = search_max[2] - boxSize_Z; + search_min_Ladd[2] = search_min[2] + boxSize_Z; + refsearch_max_Lsub[2] = refsearch_max[2] - boxSize_Z; + refsearch_min_Ladd[2] = refsearch_min[2] + boxSize_Z; +#else /* #if !defined(REFLECTIVE_Z) */ + search_max_Lsub[2] = 2 * boxSize_Z - search_max[2]; + search_min_Ladd[2] = -search_min[2]; + refsearch_max_Lsub[2] = 2 * boxSize_Z - refsearch_max[2]; + refsearch_min_Ladd[2] = -refsearch_min[2]; +#endif /* #if !defined(REFLECTIVE_Z) #else */ + + numngb = 0; + mindistance = 1.0e70; + int count; + + count = 0; + + maxdistSquared = maxdist * maxdist; + hsmlSquared = hsml * hsml; + + numngb = 0; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Ngb_MaxPart; /* root node */ + +#ifdef EXTENDED_GHOST_SEARCH + bitflags = 0; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + } + else + { + no = firstnode[k]; + +#ifdef EXTENDED_GHOST_SEARCH + bitflags = first_bitflag[k]; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + no = Ngb_Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + count++; + if(no < Ngb_MaxPart) /* single particle */ + { + p = no; + no = Ngb_Nextnode[no]; + + if(P[p].Type > 0) + continue; + + if(P[p].Mass == 0 && P[p].ID == 0) + continue; /* skip cells that have been swallowed or eliminated */ + + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + offx = offy = offz = 0; + + image_flag = 0; /* for each coordinates there are three possibilities. We + encode them to basis three, i.e. x*3^0 + y*3^1 + z*3^2 */ + +#if !defined(REFLECTIVE_X) + if(P[p].Pos[0] - refpos[0] < -boxHalf_X) + { + offx = boxSize_X; + image_flag += 1; + } + else if(P[p].Pos[0] - refpos[0] > boxHalf_X) + { + offx = -boxSize_X; + image_flag += 2; + } +#endif /* #if !defined(REFLECTIVE_X) */ + +#if !defined(REFLECTIVE_Y) + if(P[p].Pos[1] - refpos[1] < -boxHalf_Y) + { + offy = boxSize_Y; + image_flag += 1 * 3; + } + else if(P[p].Pos[1] - refpos[1] > boxHalf_Y) + { + offy = -boxSize_Y; + image_flag += 2 * 3; + } +#endif /* #if !defined(REFLECTIVE_Y) */ + +#if !defined(REFLECTIVE_Z) && !defined(TWODIMS) + if(P[p].Pos[2] - refpos[2] < -boxHalf_Z) + { + offz = boxSize_Z; + image_flag += 1 * 9; + } + else if(P[p].Pos[2] - refpos[2] > boxHalf_Z) + { + offz = -boxSize_Z; + image_flag += 2 * 9; + } +#endif /* #if !defined(REFLECTIVE_Z) && !defined(TWODIMS) */ + + int image_flag_periodic_bnds = image_flag; + +#if defined(REFLECTIVE_X) + int repx; + for(repx = -1; repx <= 1; repx++, offx = 0) +#endif /* #if defined(REFLECTIVE_X) */ + { +#if defined(REFLECTIVE_Y) + int repy; + for(repy = -1; repy <= 1; repy++, offy = 0) +#endif /* #if defined(REFLECTIVE_Y) */ + { +#if defined(REFLECTIVE_Z) && !defined(TWODIMS) + int repz; + for(repz = -1; repz <= 1; repz++, offz = 0) +#endif /* #if defined(REFLECTIVE_Z) && !defined(TWODIMS) */ + { + image_flag = image_flag_periodic_bnds; + + x = P[p].Pos[0]; + y = P[p].Pos[1]; + z = P[p].Pos[2]; + +#if defined(REFLECTIVE_X) + if(repx == 1) + { + offx = 2 * boxSize_X; + image_flag += 2; + } + else if(repx == -1) + { + image_flag += 1; + } + if(repx != 0) + x = -x; +#endif /* #if defined(REFLECTIVE_X) */ + +#if defined(REFLECTIVE_Y) + if(repy == 1) + { + offy = 2 * boxSize_Y; + image_flag += 2 * 3; + } + else if(repy == -1) + { + image_flag += 1 * 3; + } + if(repy != 0) + y = -y; +#endif /* #if defined(REFLECTIVE_Y) */ + +#if defined(REFLECTIVE_Z) && !defined(TWODIMS) + if(repz == 1) + { + offz = 2 * boxSize_Z; + image_flag += 2 * 9; + } + else if(repz == -1) + { + image_flag += 1 * 9; + } + if(repz != 0) + z = -z; +#endif /* #if defined(REFLECTIVE_Z) && !defined(TWODIMS) */ + + x += offx; + y += offy; + z += offz; + + dx_ref = x - refpos[0]; + dy_ref = y - refpos[1]; + dz_ref = z - refpos[2]; + + if((thisdistance = dx_ref * dx_ref + dy_ref * dy_ref + dz_ref * dz_ref) > maxdistSquared) + continue; + + dx = x - searchcenter[0]; + dy = y - searchcenter[1]; + dz = z - searchcenter[2]; + + if(dx * dx + dy * dy + dz * dz > hsmlSquared) + continue; + + /* now we need to check whether this particle has already been sent to + the requesting cpu for this particular image shift */ + + if(thisdistance >= mindistance) + continue; + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + listp = ListExports[listp].nextexport; + } + + if(listp >= 0) + if((ListExports[listp].image_bits & (1 << image_flag))) /* already in list */ + continue; + } + else + { + if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) /* already in list */ + continue; + } + } + + /* here we have found a new closest particle that has not been inserted yet */ + + numngb = 1; + mindistance = thisdistance; + min_p = p; + min_imageflag = image_flag; + min_x = x; + min_y = y; + min_z = z; + + maxdistSquared = thisdistance; + } + } + } + } + else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + current = &Ngb_Nodes[no]; + no = current->u.d.sibling; /* in case the node can be discarded */ + + if(current->Ti_Current != All.Ti_Current) + { + drift_node(current, All.Ti_Current); + } + +#if !defined(REFLECTIVE_X) + if(search_min[0] > current->u.d.range_max[0] && search_max_Lsub[0] < current->u.d.range_min[0]) + continue; + if(search_min_Ladd[0] > current->u.d.range_max[0] && search_max[0] < current->u.d.range_min[0]) + continue; +#else /* #if !defined(REFLECTIVE_X) */ + if(search_min[0] > current->u.d.range_max[0] && search_max_Lsub[0] > current->u.d.range_max[0]) + continue; + if(search_min_Ladd[0] < current->u.d.range_min[0] && search_max[0] < current->u.d.range_min[0]) + continue; +#endif /* #if !defined(REFLECTIVE_X) #else */ + +#if !defined(REFLECTIVE_Y) + if(search_min[1] > current->u.d.range_max[1] && search_max_Lsub[1] < current->u.d.range_min[1]) + continue; + if(search_min_Ladd[1] > current->u.d.range_max[1] && search_max[1] < current->u.d.range_min[1]) + continue; +#else /* #if !defined(REFLECTIVE_Y) */ + if(search_min[1] > current->u.d.range_max[1] && search_max_Lsub[1] > current->u.d.range_max[1]) + continue; + if(search_min_Ladd[1] < current->u.d.range_min[1] && search_max[1] < current->u.d.range_min[1]) + continue; +#endif /* #if !defined(REFLECTIVE_Y) #else */ + +#if !defined(REFLECTIVE_Z) + if(search_min[2] > current->u.d.range_max[2] && search_max_Lsub[2] < current->u.d.range_min[2]) + continue; + if(search_min_Ladd[2] > current->u.d.range_max[2] && search_max[2] < current->u.d.range_min[2]) + continue; +#else /* #if !defined(REFLECTIVE_Z) */ + if(search_min[2] > current->u.d.range_max[2] && search_max_Lsub[2] > current->u.d.range_max[2]) + continue; + if(search_min_Ladd[2] < current->u.d.range_min[2] && search_max[2] < current->u.d.range_min[2]) + continue; +#endif /* #if !defined(REFLECTIVE_Z) #else */ + + /* now deal with the search region of the reference point */ + +#if !defined(REFLECTIVE_X) + if(refsearch_min[0] > current->u.d.range_max[0] && refsearch_max_Lsub[0] < current->u.d.range_min[0]) + continue; + if(refsearch_min_Ladd[0] > current->u.d.range_max[0] && refsearch_max[0] < current->u.d.range_min[0]) + continue; +#else /* #if !defined(REFLECTIVE_X) */ + if(refsearch_min[0] > current->u.d.range_max[0] && refsearch_max_Lsub[0] > current->u.d.range_max[0]) + continue; + if(refsearch_min_Ladd[0] < current->u.d.range_min[0] && refsearch_max[0] < current->u.d.range_min[0]) + continue; +#endif /* #if !defined(REFLECTIVE_X) #else */ + +#if !defined(REFLECTIVE_Y) + if(refsearch_min[1] > current->u.d.range_max[1] && refsearch_max_Lsub[1] < current->u.d.range_min[1]) + continue; + if(refsearch_min_Ladd[1] > current->u.d.range_max[1] && refsearch_max[1] < current->u.d.range_min[1]) + continue; +#else /* #if !defined(REFLECTIVE_Y) */ + if(refsearch_min[1] > current->u.d.range_max[1] && refsearch_max_Lsub[1] > current->u.d.range_max[1]) + continue; + if(refsearch_min_Ladd[1] < current->u.d.range_min[1] && refsearch_max[1] < current->u.d.range_min[1]) + continue; +#endif /* #if !defined(REFLECTIVE_Y) #else */ + +#if !defined(REFLECTIVE_Z) + if(refsearch_min[2] > current->u.d.range_max[2] && refsearch_max_Lsub[2] < current->u.d.range_min[2]) + continue; + if(refsearch_min_Ladd[2] > current->u.d.range_max[2] && refsearch_max[2] < current->u.d.range_min[2]) + continue; +#else /* #if !defined(REFLECTIVE_Z) */ + if(refsearch_min[2] > current->u.d.range_max[2] && refsearch_max_Lsub[2] > current->u.d.range_max[2]) + continue; + if(refsearch_min_Ladd[2] < current->u.d.range_min[2] && refsearch_max[2] < current->u.d.range_min[2]) + continue; +#endif /* #if !defined(REFLECTIVE_Z) #else */ + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else /* pseudo particle */ + { + if(mode == 1) + terminate("mode == 1"); + + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES should not occur here"); + + if(target >= 0) /* if no target is given, export will not occur */ + ngb_treefind_export_node_threads(no, target, thread_id, image_flag); + + no = Ngb_Nextnode[no - Ngb_MaxNodes]; + continue; + } + } + } + + if(numngb) + { + p = min_p; + + image_flag = min_imageflag; + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + if(ListExports[listp].nextexport < 0) + { + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_P[p].currentexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + ListExports[listp].nextexport = List_P[p].currentexport; + break; + } + listp = ListExports[listp].nextexport; + } + } + } + else + { + /* here we have a local particle that hasn't been made part of the mesh */ + + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + } + + if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) + terminate("this should not happen"); + + ListExports[List_P[p].currentexport].image_bits |= (1 << image_flag); + + /* add the particle to the ones that need to be exported */ + + if(P[p].Ti_Current != All.Ti_Current) + terminate("surprise! we don't expect this here anymore"); + + if(origin == ThisTask) + { + if(mode == 1) + terminate("mode==1: how can this be?"); + + if(T->Ndp >= T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(T->Ndp >= T->MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + point *dp = &T->DP[T->Ndp]; + dp->x = min_x; + dp->y = min_y; + dp->z = min_z; + dp->task = ThisTask; + dp->ID = P[p].ID; + if(image_flag) + dp->index = p + NumGas; /* this is a replicated/mirrored local point */ + else + dp->index = p; /* this is actually a local point that wasn't made part of the mesh yet */ + dp->originalindex = p; + dp->timebin = P[p].TimeBinHydro; + dp->image_flags = (1 << image_flag); +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + T->Ndp++; + NadditionalPoints++; + } + else + { + if(mode == 0) + terminate("mode == 0: how can this be?"); + + if(N_DP_Buffer >= MaxN_DP_Buffer) + { + T->Indi.AllocFacN_DP_Buffer *= ALLOC_INCREASE_FACTOR; + MaxN_DP_Buffer = T->Indi.AllocFacN_DP_Buffer; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxN_DP_Buffer=%d Indi.AllocFacN_DP_Buffer=%g\n", ThisTask, MaxN_DP_Buffer, + T->Indi.AllocFacN_DP_Buffer); +#endif /* #ifdef VERBOSE */ + DP_Buffer = (point *)myrealloc_movable(DP_Buffer, MaxN_DP_Buffer * sizeof(point)); + + if(N_DP_Buffer >= MaxN_DP_Buffer) + terminate("(N_DP_Buffer >= MaxN_DP_Buffer"); + } + + SphP[p].ActiveArea = 0; + + DP_Buffer[N_DP_Buffer].x = min_x; + DP_Buffer[N_DP_Buffer].y = min_y; + DP_Buffer[N_DP_Buffer].z = min_z; + DP_Buffer[N_DP_Buffer].ID = P[p].ID; + DP_Buffer[N_DP_Buffer].task = ThisTask; + DP_Buffer[N_DP_Buffer].index = p; + DP_Buffer[N_DP_Buffer].originalindex = p; + DP_Buffer[N_DP_Buffer].timebin = P[p].TimeBinHydro; + DP_Buffer[N_DP_Buffer].image_flags = (1 << image_flag); +#ifdef DOUBLE_STENCIL + DP_Buffer[N_DP_Buffer].Hsml = SphP[p].Hsml; + DP_Buffer[N_DP_Buffer].first_connection = -1; + DP_Buffer[N_DP_Buffer].last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + send_count_new[origin]++; + N_DP_Buffer++; + } + } + + return numngb; +} + +#endif /* #ifdef EXTENDED_GHOST_SEARCH #else */ + +/*! \brief Counts up undecided tetrahedra. + * + * \param[in] T Pointer to tessellation. + * + * \return (Local) number of undecided tetrahedra. + */ +int count_undecided_tetras(tessellation *T) +{ + int i, count; + + for(i = 0, count = 0; i < T->Ndt; i++) + if((T->DTF[i] & 2) == 0) + count++; + + return count; +} + +#endif /* #if !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c new file mode 100644 index 0000000000..4323ab0a6d --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c @@ -0,0 +1,944 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_gradients.c + * \date 05/2018 + * \brief Least square fit gradient calculation. + * \details Described in Pakmor et al (2016). + * contains functions: + * static void inline add_row(double X[NUMDIMS][NUMDIMS], + * double y[NUMDIMS], int source_row, double fac, + * int target_row) + * static void solve_matrix_problem(double X[NUMDIMS][NUMDIMS], + * double y[NUMDIMS], double grad[NUMDIMS]) + * void calculate_gradients(void) + * void compute_divergences() + * void correct_for_reflective_boundaries(double *ValueOther, + * double Value, int type, unsigned int *image_flags) + * void limit_gradients(void) + * void limit_vel_gradient(double *d, MySingle * grad_vx, + * MySingle * grad_vy, MySingle * grad_vz, double csnd) + * void limit_gradient(double *d, double phi, double min_phi, + * double max_phi, MySingle * dphi) + * double boundaryX(double dx) + * double boundaryY(double dx) + * double boundaryZ(double dx) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 23.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if !defined(ONEDIMS) + +static double *minvalues, *maxvalues; + +static void limit_gradients(); +static void correct_for_reflective_boundaries(double *ValueOther, double Value, int type, unsigned int *image_flags); + +static double boundaryX(double dx); +static double boundaryY(double dy); +static double boundaryZ(double dz); + +#if defined(OUTPUT_DIVVEL) || defined(MHD) +static void compute_divergences(); +#endif /* #if defined(OUTPUT_DIVVEL) || defined(MHD) */ + +/*! \brief Adds row to another one in matrix equation. + * + * Auxiliary routine to solve_matrix_problem. + * + * \param[in, out] X Matrix. + * \param[in, out] y Vector. + * \param[in] source_row Index of row that should be added. + * \param[in] fac Factor by which row is multiplied before adding. + * \param[in] target_row Index of row to which to add source row. + * + * \return void + */ +static void inline add_row(double X[NUMDIMS][NUMDIMS], double y[NUMDIMS], int source_row, double fac, int target_row) +{ + y[target_row] += fac * y[source_row]; + + for(int i = 0; i < NUMDIMS; i++) + { + X[target_row][i] += fac * X[source_row][i]; + } +} + +/*! \brief Solve a matrix problem X*grad = y. + * + * Note that we know here that X is symmetric, and that we can pivot on the + * diagonal elements. + * + * \param[in, out] x Matrix. + * \param[in, out] y Vector. + * \param[out] grad Gradient. + * + */ +static void solve_matrix_problem(double X[NUMDIMS][NUMDIMS], double y[NUMDIMS], double grad[NUMDIMS]) +{ +#if NUMDIMS == 2 + int perm[NUMDIMS]; + + if(fabs(X[0][0]) > fabs(X[1][1])) + { + perm[0] = 0; + perm[1] = 1; + } + else + { + perm[0] = 1; + perm[1] = 0; + } + + add_row(X, y, perm[0], -X[perm[1]][perm[0]] / X[perm[0]][perm[0]], perm[1]); + + grad[perm[1]] = y[perm[1]] / X[perm[1]][perm[1]]; + grad[perm[0]] = (y[perm[0]] - X[perm[0]][perm[1]] * grad[perm[1]]) / X[perm[0]][perm[0]]; + +#else /* #if NUMDIMS==2 */ + + int perm[NUMDIMS]; + + if(fabs(X[2][2]) > fabs(X[1][1]) && fabs(X[2][2]) > fabs(X[0][0])) + { + perm[0] = 2; + perm[1] = 0; + perm[2] = 1; + } + else if(fabs(X[1][1]) > fabs(X[0][0])) + { + perm[0] = 1; + perm[1] = 0; + perm[2] = 2; + } + else + { + perm[0] = 0; + perm[1] = 1; + perm[2] = 2; + } + + add_row(X, y, perm[0], -X[perm[1]][perm[0]] / X[perm[0]][perm[0]], perm[1]); + add_row(X, y, perm[0], -X[perm[2]][perm[0]] / X[perm[0]][perm[0]], perm[2]); + + if(fabs(X[perm[1]][perm[1]]) < fabs(X[perm[2]][perm[2]])) + { + int p = perm[1]; + perm[1] = perm[2]; + perm[2] = p; + } + + add_row(X, y, perm[1], -X[perm[2]][perm[1]] / X[perm[1]][perm[1]], perm[2]); + + grad[perm[2]] = y[perm[2]] / X[perm[2]][perm[2]]; + grad[perm[1]] = (y[perm[1]] - X[perm[1]][perm[2]] * grad[perm[2]]) / X[perm[1]][perm[1]]; + grad[perm[0]] = (y[perm[0]] - X[perm[0]][perm[1]] * grad[perm[1]] - X[perm[0]][perm[2]] * grad[perm[2]]) / X[perm[0]][perm[0]]; + +#endif /* #if NUMDIMS==2 #else */ +} + +/*! \brief Loop through all active cells and calculate gradients. + * + * \return void + */ +void calculate_gradients(void) +{ + TIMER_START(CPU_GRADIENTS); + + mpi_printf("VORONOI: Calculating Gradients...\n"); + + minvalues = mymalloc("gradmin", NumGas * N_Grad * sizeof(double)); + maxvalues = mymalloc("gradmax", NumGas * N_Grad * sizeof(double)); + + struct matrix_vec_data + { + double X[NUMDIMS][NUMDIMS]; /* input matrix */ + double y[NUMDIMS]; /* input vector */ + double grad[NUMDIMS]; /* output */ + } * mdata; + + mdata = mymalloc("mdata", N_Grad * sizeof(struct matrix_vec_data)); + + double *Value = mymalloc("Value", N_Grad * sizeof(double)); + + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(int k = 0; k < N_Grad; k++) + { + minvalues[i * N_Grad + k] = +MAX_REAL_NUMBER; + maxvalues[i * N_Grad + k] = -MAX_REAL_NUMBER; + + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + { + Value[k] = *(MyFloat *)(((char *)(&P[i])) + grad_elements[k].offset) / All.cf_atime; + } + else + Value[k] = *(MyFloat *)(((char *)(&SphP[i])) + grad_elements[k].offset); + } + + MyDouble *Center = SphP[i].Center; + + /* reset matrix and vector to 0 */ + memset(mdata, 0, N_Grad * sizeof(struct matrix_vec_data)); + +#ifdef REFLECTIVE_X + int OutFlowX = 0; +#endif /* #ifdef REFLECTIVE_X */ +#ifdef REFLECTIVE_Y + int OutFlowY = 0; +#endif /* #ifdef REFLECTIVE_Y */ +#ifdef REFLECTIVE_Z + int OutFlowZ = 0; +#endif /* #ifdef REFLECTIVE_Z */ + + int q = SphP[i].first_connection; + + while(q >= 0) + { + int dp = DC[q].dp_index; + int vf = DC[q].vf_index; + int particle = Mesh.DP[dp].index; + + if(particle < 0) + { + /* cell has been removed */ + q = DC[q].next; + continue; + } + + if(Mesh.VF[vf].area > 1e-10 * SphP[i].SurfaceArea) + { + MyDouble *CenterOther, Mirror[3]; + + if(particle >= NumGas && Mesh.DP[dp].task == ThisTask) + particle -= NumGas; + +#ifdef REFLECTIVE_X + if((Mesh.DP[dp].image_flags & REFL_X_FLAGS) && (Mesh.DP[dp].image_flags & OUTFLOW_X)) + OutFlowX = 1; +#endif /* #ifdef REFLECTIVE_X */ +#ifdef REFLECTIVE_Y + if((Mesh.DP[dp].image_flags & REFL_Y_FLAGS) && (Mesh.DP[dp].image_flags & OUTFLOW_Y)) + OutFlowY = 1; +#endif /* #ifdef REFLECTIVE_Y */ +#ifdef REFLECTIVE_Z + if((Mesh.DP[dp].image_flags & REFL_Z_FLAGS) && (Mesh.DP[dp].image_flags & OUTFLOW_Z)) + OutFlowZ = 1; +#endif /* #ifdef REFLECTIVE_Z */ + + if(Mesh.DP[dp].task == ThisTask) + { +#ifndef VORONOI_STATIC_MESH + if(P[particle].Ti_Current != All.Ti_Current) + terminate("surprise! we don't expect this here anymore"); +#endif /* #ifndef VORONOI_STATIC_MESH */ + + if(P[particle].ID == P[i].ID) + { + /* mirrored cell, we have to mirror the Center */ + + /* calculate normal vector of the interface */ + double nx = Mesh.DP[dp].x - P[i].Pos[0]; + double ny = Mesh.DP[dp].y - P[i].Pos[1]; + double nz = Mesh.DP[dp].z - P[i].Pos[2]; + + /* perpendicular on the surface */ + double nn = sqrt(nx * nx + ny * ny + nz * nz); + nx /= nn; + ny /= nn; + nz /= nn; + double fx = (Center[0] - Mesh.VF[vf].cx); + double fy = (Center[1] - Mesh.VF[vf].cy); + double fz = (Center[2] - Mesh.VF[vf].cz); + double ff = (fx * nx + fy * ny + fz * nz); + + double px = Center[0] - ff * nx; + double py = Center[1] - ff * ny; + double pz = Center[2] - ff * nz; + + Mirror[0] = 2. * px - Center[0]; + Mirror[1] = 2. * py - Center[1]; + Mirror[2] = 2. * pz - Center[2]; + CenterOther = Mirror; + } + else + CenterOther = SphP[particle].Center; + } + else + CenterOther = PrimExch[particle].Center; + + double norm[3]; + norm[0] = boundaryX(CenterOther[0] - Center[0]); + norm[1] = boundaryY(CenterOther[1] - Center[1]); + norm[2] = boundaryZ(CenterOther[2] - Center[2]); + + double dist = sqrt(norm[0] * norm[0] + norm[1] * norm[1] + norm[2] * norm[2]); + double distinv = 1.0 / dist; + norm[0] *= distinv; + norm[1] *= distinv; + norm[2] *= distinv; + + double weight = Mesh.VF[vf].area; + + for(int k = 0; k < N_Grad; k++) + { + double ValueOther; + + if(Mesh.DP[dp].task == ThisTask) + { + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + { + ValueOther = *(MyFloat *)(((char *)(&P[particle])) + grad_elements[k].offset); + } + else + ValueOther = *(MyFloat *)(((char *)(&SphP[particle])) + grad_elements[k].offset); + } + else + { + ValueOther = *(MyFloat *)(((char *)(&PrimExch[particle])) + grad_elements[k].offset_exch); + } + + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + { + ValueOther /= All.cf_atime; + +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + correct_for_reflective_boundaries(&ValueOther, Value[k], grad_elements[k].type, &Mesh.DP[dp].image_flags); +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ + if(grad_elements[k].type == GRADIENT_TYPE_VELX) + ValueOther += norm[0] * dist * All.cf_atime * All.cf_Hrate; + else if(grad_elements[k].type == GRADIENT_TYPE_VELY) + ValueOther += norm[1] * dist * All.cf_atime * All.cf_Hrate; + else if(grad_elements[k].type == GRADIENT_TYPE_VELZ) + ValueOther += norm[2] * dist * All.cf_atime * All.cf_Hrate; + } + + double fac = weight * (ValueOther - Value[k]) / dist; + + for(int ia = 0; ia < NUMDIMS; ia++) + { + mdata[k].y[ia] += fac * norm[ia]; + + for(int ib = 0; ib < NUMDIMS; ib++) + mdata[k].X[ia][ib] += weight * norm[ia] * norm[ib]; + } + + if(ValueOther < minvalues[i * N_Grad + k]) + minvalues[i * N_Grad + k] = ValueOther; + + if(ValueOther > maxvalues[i * N_Grad + k]) + maxvalues[i * N_Grad + k] = ValueOther; + } + } + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + + for(int k = 0; k < N_Grad; k++) + { + solve_matrix_problem(mdata[k].X, mdata[k].y, mdata[k].grad); + + MySingle *data = (MySingle *)(((char *)(&(SphP[i].Grad))) + grad_elements[k].offset_grad); + for(int j = 0; j < NUMDIMS; j++) + data[j] = mdata[k].grad[j]; + for(int j = NUMDIMS; j < 3; j++) + data[j] = 0.; + +#ifdef REFLECTIVE_X + if(OutFlowX) + data[0] = 0; +#endif /* #ifdef REFLECTIVE_X */ +#ifdef REFLECTIVE_Y + if(OutFlowY) + data[1] = 0; +#endif /* #ifdef REFLECTIVE_Y */ +#ifdef REFLECTIVE_Z + if(OutFlowZ) + data[2] = 0; +#endif /* #ifdef REFLECTIVE_Z */ + } + } + + myfree(Value); + myfree(mdata); + +#ifdef MHD + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].CurlB[0] = SphP[i].Grad.dB[2][1] - SphP[i].Grad.dB[1][2]; + SphP[i].CurlB[1] = SphP[i].Grad.dB[0][2] - SphP[i].Grad.dB[2][0]; + SphP[i].CurlB[2] = SphP[i].Grad.dB[1][0] - SphP[i].Grad.dB[0][1]; + } +#endif /* #ifdef MHD */ + + limit_gradients(); + +#ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED + /* compute magnitude of curl */ + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + double curlx = SphP[i].Grad.dvel[2][1] - SphP[i].Grad.dvel[1][2]; + double curly = SphP[i].Grad.dvel[0][2] - SphP[i].Grad.dvel[2][0]; + double curlz = SphP[i].Grad.dvel[1][0] - SphP[i].Grad.dvel[0][1]; + + SphP[i].CurlVel = sqrt(curlx * curlx + curly * curly + curlz * curlz); + } +#endif /* #ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED */ + + myfree(maxvalues); + myfree(minvalues); + +#if defined(OUTPUT_DIVVEL) || defined(MHD) + compute_divergences(); +#endif /* #if defined(OUTPUT_DIVVEL) || defined(MHD */ + + TIMER_STOP(CPU_GRADIENTS); +} + +#if defined(OUTPUT_DIVVEL) || defined(MHD) +/*! \brief Computes divergences applying the Gauss' law. + * + * Loops through all active cells and computes the fluxes through all + * its interfaces. + * + * \return 0 + */ +void compute_divergences() +{ + mpi_printf("VORONOI: Computing divergences... \n"); + + exchange_primitive_variables_and_gradients(); + + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + +#if defined(OUTPUT_DIVVEL) + SphP[i].DivVel = 0; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + SphP[i].DivB = 0; +#endif /* #ifdef MHD */ + + MyDouble *CenterOther, Mirror[3]; +#if defined(OUTPUT_DIVVEL) + MyFloat *VelOther; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + MyFloat *BOther, B[3]; + struct grad_data *GradOther; +#endif /* #ifdef MHD */ + + int q = SphP[i].first_connection; + while(q >= 0) + { + int dp = DC[q].dp_index; + int vf = DC[q].vf_index; + int particle = Mesh.DP[dp].index; + + if(particle < 0) + { + /* cell has been removed */ + q = DC[q].next; + continue; + } + + if(Mesh.VF[vf].area > 1e-10 * SphP[i].SurfaceArea) + { +#ifdef MHD + double dx = boundaryX(Mesh.VF[vf].cx - SphP[i].Center[0]); + double dy = boundaryY(Mesh.VF[vf].cy - SphP[i].Center[1]); + double dz = boundaryZ(Mesh.VF[vf].cz - SphP[i].Center[2]); + + for(int j = 0; j < 3; j++) + B[j] = SphP[i].B[j] + SphP[i].Grad.dB[j][0] * dx + SphP[i].Grad.dB[j][1] * dy + SphP[i].Grad.dB[j][2] * dz; +#endif /* #ifdef MHD */ + + if(particle >= NumGas && Mesh.DP[dp].task == ThisTask) + particle -= NumGas; + + if(Mesh.DP[dp].task == ThisTask) + { + if(P[particle].ID == P[i].ID) + { + /* mirrored cell, we have to mirror the Center */ + /* calculate normal vector of the interface */ + double nx = Mesh.DP[dp].x - P[i].Pos[0]; + double ny = Mesh.DP[dp].y - P[i].Pos[1]; + double nz = Mesh.DP[dp].z - P[i].Pos[2]; + /* perpendicular on the surface */ + double nn = sqrt(nx * nx + ny * ny + nz * nz); + nx /= nn; + ny /= nn; + nz /= nn; + double fx = (SphP[i].Center[0] - Mesh.VF[vf].cx); + double fy = (SphP[i].Center[1] - Mesh.VF[vf].cy); + double fz = (SphP[i].Center[2] - Mesh.VF[vf].cz); + double ff = (fx * nx + fy * ny + fz * nz); + double px = SphP[i].Center[0] - ff * nx; + double py = SphP[i].Center[1] - ff * ny; + double pz = SphP[i].Center[2] - ff * nz; + Mirror[0] = 2. * px - SphP[i].Center[0]; + Mirror[1] = 2. * py - SphP[i].Center[1]; + Mirror[2] = 2. * pz - SphP[i].Center[2]; + CenterOther = Mirror; + } + else + CenterOther = SphP[particle].Center; + +#if defined(OUTPUT_DIVVEL) + VelOther = P[particle].Vel; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + GradOther = &SphP[particle].Grad; + BOther = SphP[particle].B; +#endif /* #ifdef MHD */ + } + else + { + CenterOther = PrimExch[particle].Center; +#if defined(OUTPUT_DIVVEL) + VelOther = PrimExch[particle].VelGas; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + GradOther = &GradExch[particle]; + BOther = PrimExch[particle].B; +#endif /* #ifdef MHD */ + } + +#ifdef MHD + dx = boundaryX(Mesh.VF[vf].cx - CenterOther[0]); + dy = boundaryY(Mesh.VF[vf].cy - CenterOther[1]); + dz = boundaryZ(Mesh.VF[vf].cz - CenterOther[2]); + + for(int j = 0; j < 3; j++) + B[j] = 0.5 * (B[j] + BOther[j] + GradOther->dB[j][0] * dx + GradOther->dB[j][1] * dy + GradOther->dB[j][2] * dz); +#endif /* #ifdef MHD */ + + double norm[3]; + norm[0] = boundaryX(CenterOther[0] - SphP[i].Center[0]); + norm[1] = boundaryY(CenterOther[1] - SphP[i].Center[1]); + norm[2] = boundaryZ(CenterOther[2] - SphP[i].Center[2]); + + double dist = sqrt(norm[0] * norm[0] + norm[1] * norm[1] + norm[2] * norm[2]); + norm[0] /= dist; + norm[1] /= dist; + norm[2] /= dist; + +#if defined(OUTPUT_DIVVEL) + double Vel[3]; + for(int j = 0; j < 3; j++) + Vel[j] = 0.5 * (P[i].Vel[j] + VelOther[j]); + double nVel = Vel[0] * norm[0] + Vel[1] * norm[1] + Vel[2] * norm[2]; + SphP[i].DivVel += Mesh.VF[vf].area * nVel; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + double nB = B[0] * norm[0] + B[1] * norm[1] + B[2] * norm[2]; + SphP[i].DivB += Mesh.VF[vf].area * nB; +#endif /* #ifdef MHD */ + } + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + +#if defined(OUTPUT_DIVVEL) + SphP[i].DivVel /= SphP[i].Volume; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + SphP[i].DivB /= SphP[i].Volume; +#endif /* #ifdef MHD */ + } +} +#endif /* #if defined(OUTPUT_DIVVEL) || defined(MHD) */ + +/*! \brief Correct values for gradient calculation for reflective boundary + * conditions. + * + * + * \param[in, out] Value of other cell. + * \param[in] Value Value of this cell. + * \param[in] type Type of gradient (x,y,z direction). + * \param[in] image_flags Flag that signals boundary interface. + * + * \return void + */ +void correct_for_reflective_boundaries(double *ValueOther, double Value, int type, unsigned int *image_flags) +{ +#if defined(REFLECTIVE_X) + if(type == GRADIENT_TYPE_VELX) + { + if((*image_flags & REFL_X_FLAGS) && !(*image_flags & OUTFLOW_X)) + *ValueOther *= -1; + if((*image_flags & REFL_X_FLAGS) && (*image_flags & OUTFLOW_X)) + *ValueOther = Value; + } +#endif /* #if defined(REFLECTIVE_X) */ + +#if defined(REFLECTIVE_Y) + if(type == GRADIENT_TYPE_VELY) + { + if((*image_flags & REFL_Y_FLAGS) && !(*image_flags & OUTFLOW_Y)) + *ValueOther *= -1; + if((*image_flags & REFL_Y_FLAGS) && (*image_flags & OUTFLOW_Y)) + *ValueOther = Value; + } +#endif /* #if defined(REFLECTIVE_Y) */ + +#if defined(REFLECTIVE_Z) + if(type == GRADIENT_TYPE_VELZ) + { + if((*image_flags & REFL_Z_FLAGS) && !(*image_flags & OUTFLOW_Z)) + *ValueOther *= -1; + if((*image_flags & REFL_Z_FLAGS) && (*image_flags & OUTFLOW_Z)) + *ValueOther = Value; + } +#endif /* #if defined(REFLECTIVE_Z) */ +} + +/*! \brief Loops through mesh and limits associated gradients. + * + * \return void + */ +void limit_gradients(void) +{ + mpi_printf("VORONOI: Limiting gradients...\n"); + + point *DP = Mesh.DP; + face *VF = Mesh.VF; + + for(int i = 0; i < Mesh.Nvf; i++) + { + if(DP[VF[i].p1].index < 0 || DP[VF[i].p2].index < 0) + continue; + for(int j = 0; j < 2; j++) + { + point *p; + if(j == 0) + { + p = &DP[VF[i].p1]; + } + else + { + p = &DP[VF[i].p2]; + } + + if(p->task == ThisTask && p->index >= 0 && p->index < NumGas) + { + int q = p->index; + if(TimeBinSynchronized[P[q].TimeBinHydro]) + { + double d[3]; + d[0] = VF[i].cx - SphP[q].Center[0]; + d[1] = VF[i].cy - SphP[q].Center[1]; + d[2] = VF[i].cz - SphP[q].Center[2]; +#if !defined(REFLECTIVE_X) + double xtmp; + d[0] = NEAREST_X(d[0]); +#endif /* #if !defined(REFLECTIVE_X) */ +#if !defined(REFLECTIVE_Y) + double ytmp; + d[1] = NEAREST_Y(d[1]); +#endif /* #if !defined(REFLECTIVE_Y) */ +#if !defined(REFLECTIVE_Z) + double ztmp; + d[2] = NEAREST_Z(d[2]); +#endif /* #if !defined(REFLECTIVE_Z) */ + double value; + MySingle *data; + if(VF[i].area > 1.0e-10 * SphP[q].SurfaceArea) + { + for(int k = 0; k < N_Grad; k++) + { + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + { + value = *(MyFloat *)(((char *)(&P[q])) + grad_elements[k].offset); + value /= All.cf_atime; + } + else + value = *(MyFloat *)(((char *)(&SphP[q])) + grad_elements[k].offset); + + data = (MySingle *)(((char *)(&(SphP[q].Grad))) + grad_elements[k].offset_grad); + + if(grad_elements[k].type != GRADIENT_TYPE_RTF) + limit_gradient(d, value, minvalues[q * N_Grad + k], maxvalues[q * N_Grad + k], data); + } + } + } + } + } + } + +#ifndef DISABLE_VELOCITY_CSND_SLOPE_LIMITING + for(int i = 0; i < Mesh.Nvf; i++) + { + if(DP[VF[i].p1].index < 0 || DP[VF[i].p2].index < 0) + continue; + for(int j = 0; j < 2; j++) + { + point *p; + + if(j == 0) + { + p = &DP[VF[i].p1]; + } + else + { + p = &DP[VF[i].p2]; + } + + if(p->task == ThisTask && p->index >= 0 && p->index < NumGas) + { + int q = p->index; + if(TimeBinSynchronized[P[q].TimeBinHydro]) + { + double d[3]; + d[0] = VF[i].cx - SphP[q].Center[0]; + d[1] = VF[i].cy - SphP[q].Center[1]; + d[2] = VF[i].cz - SphP[q].Center[2]; +#if !defined(REFLECTIVE_X) + double xtmp; + d[0] = NEAREST_X(d[0]); +#endif +#if !defined(REFLECTIVE_Y) + double ytmp; + d[1] = NEAREST_Y(d[1]); +#endif +#if !defined(REFLECTIVE_Z) + double ztmp; + d[2] = NEAREST_Z(d[2]); +#endif + double value; + MySingle *data; + + if(VF[i].area > 1.0e-10 * SphP[q].SurfaceArea) + { + /* let's now limit the overall size of the velocity gradient */ + MySingle *grad_vx = (MySingle *)(((char *)(&(SphP[q].Grad))) + GVelx->offset_grad); + MySingle *grad_vy = (MySingle *)(((char *)(&(SphP[q].Grad))) + GVely->offset_grad); + MySingle *grad_vz = (MySingle *)(((char *)(&(SphP[q].Grad))) + GVelz->offset_grad); + limit_vel_gradient(d, grad_vx, grad_vy, grad_vz, get_sound_speed(q)); + } + } + } + } + } +#endif /* #ifndef DISABLE_VELOCITY_CSND_SLOPE_LIMITING */ +} + +/*! \brief Limits velocity gradient. + * + * Limit velocity change to the sound speed. + * + * \param[in] d Direction vector. + * \param[in, out] grad_vx X-velocity gradient. + * \param[in, out] grad_vy Y-velocity gradient. + * \param[in, out] grad_vz Z-velocity gradient. + * \param[in] csnd sound speed. + * + * \return void + */ +void limit_vel_gradient(double *d, MySingle *grad_vx, MySingle *grad_vy, MySingle *grad_vz, double csnd) +{ +#define VEL_GRADIENT_LIMIT_FAC 1.0 + if(All.ComovingIntegrationOn) + { + grad_vx[0] -= All.cf_atime * All.cf_Hrate; + grad_vy[1] -= All.cf_atime * All.cf_Hrate; + grad_vz[2] -= All.cf_atime * All.cf_Hrate; + } + + double dvx = fabs(grad_vx[0] * d[0] + grad_vx[1] * d[1] + grad_vx[2] * d[2]); + double dvy = fabs(grad_vy[0] * d[0] + grad_vy[1] * d[1] + grad_vy[2] * d[2]); + double dvz = fabs(grad_vz[0] * d[0] + grad_vz[1] * d[1] + grad_vz[2] * d[2]); + if(dvx > VEL_GRADIENT_LIMIT_FAC * csnd) + { + double fac = VEL_GRADIENT_LIMIT_FAC * csnd / dvx; + for(int i = 0; i < 3; i++) + { + grad_vx[i] *= fac; + } + } + + if(dvy > VEL_GRADIENT_LIMIT_FAC * csnd) + { + double fac = VEL_GRADIENT_LIMIT_FAC * csnd / dvy; + for(int i = 0; i < 3; i++) + { + grad_vy[i] *= fac; + } + } + if(dvz > VEL_GRADIENT_LIMIT_FAC * csnd) + { + double fac = VEL_GRADIENT_LIMIT_FAC * csnd / dvz; + for(int i = 0; i < 3; i++) + { + grad_vz[i] *= fac; + } + } + + if(All.ComovingIntegrationOn) + { + grad_vx[0] += All.cf_atime * All.cf_Hrate; + grad_vy[1] += All.cf_atime * All.cf_Hrate; + grad_vz[2] += All.cf_atime * All.cf_Hrate; + } +} + +/*! \brief Limits gradients. + * + * Slope limiter. + * + * \param[in] d Direction vector. + * \param[in] phi Value. + * \param[in] min_phi Lower bound for value+gradient*dx. + * \param[in] max_phi Upper bound for value+gradient*dx. + * \param[in, out] dphi Gradient. + * + * \return void + */ +void limit_gradient(double *d, double phi, double min_phi, double max_phi, MySingle *dphi) +{ + double dp = dphi[0] * d[0] + dphi[1] * d[1] + dphi[2] * d[2]; + + if(dp > 0) + { + if(phi + dp > max_phi) + { + double fac; + + if(max_phi > phi) + fac = (max_phi - phi) / dp; + else + fac = 0; + if(fac < 0 || fac > 1) + terminate("fac=%g\ndp=%g max_phi=%g phi=%g", fac, dp, max_phi, phi); + dphi[0] *= fac; + dphi[1] *= fac; + dphi[2] *= fac; + } + } + else if(dp < 0) + { + if(phi + dp < min_phi) + { + double fac; + + if(min_phi < phi) + fac = (min_phi - phi) / dp; + else + fac = 0; + if(fac < 0 || fac > 1) + terminate("fac=%g\ndp=%g max_phi=%g phi=%g", fac, dp, max_phi, phi); + dphi[0] *= fac; + dphi[1] *= fac; + dphi[2] *= fac; + } + } +} + +/*! \brief Distance in x direction. + * + * Taking into account periodicity of simulation box, if given. + * + * \param[in] dx Distance in x direction, not taking into account periodic + * boundaries. + * + * \return Distance in x direction. + */ +double boundaryX(double dx) +{ +#if !defined(REFLECTIVE_X) + if(dx < -boxHalf_X) + dx += boxSize_X; + if(dx > boxHalf_X) + dx -= boxSize_X; +#endif /* #if !defined(REFLECTIVE_X) */ + return dx; +} + +/*! \brief Distance in y direction. + * + * Taking into account periodicity of simulation box, if given. + * + * \param[in] dy Distance in y direction, not taking into account periodic + * boundaries. + * + * \return Distance in y direction. + */ +double boundaryY(double dy) +{ +#if !defined(REFLECTIVE_Y) + if(dy < -boxHalf_Y) + dy += boxSize_Y; + if(dy > boxHalf_Y) + dy -= boxSize_Y; +#endif /* #if !defined(REFLECTIVE_Y) */ + return dy; +} + +/*! \brief Distance in z direction. + * + * Taking into account periodicity of simulation box, if given. + * + * \param[in] dz Distance in z direction, not taking into account periodic + * boundaries. + * + * \return Distance in z direction. + */ +double boundaryZ(double dz) +{ +#if !defined(REFLECTIVE_Z) + if(dz < -boxHalf_Z) + dz += boxSize_Z; + if(dz > boxHalf_Z) + dz -= boxSize_Z; +#endif /* #if !defined(REFLECTIVE_Z) */ + return dz; +} + +#endif /* #if !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c new file mode 100644 index 0000000000..d3e770a0da --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c @@ -0,0 +1,204 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_gradients.c + * \date 05/2018 + * \brief Algorithms to calculate the gradients in 1d simulations. + * \details contains functions: + * double getValue(int i, int k) + * void calculate_gradients(void) + * void compute_divvel() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 23.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if defined(ONEDIMS) + +#ifdef OUTPUT_DIVVEL +static void compute_divvel(); +#endif /* #ifdef OUTPUT_DIVVEL */ + +/*! \brief Gets a value of a quantity. + * + * \param[i] Index of cell in P and SphP array. + * \param[i] Index in grad_elements array (determines which quantity). + * + * \return value + */ +double getValue(int i, int k) +{ + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + return *(MyFloat *)(((char *)(&P[i])) + grad_elements[k].offset); + else + return *(MyFloat *)(((char *)(&SphP[i])) + grad_elements[k].offset); +} + +/*! \brief Calculates gradients in a 1d simulation. + * + * \return void + */ +void calculate_gradients(void) +{ + CPU_Step[CPU_MISC] += measure_time(); + + printf("Calculating 1D gradients...\n"); + + int idx, i, k; + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(k = 0; k < N_Grad; k++) + { + double Value = getValue(i, k); + double Pos = P[i].Pos[0]; + +#if defined(ONEDIMS_SPHERICAL) || defined(REFLECTIVE_X) + if(i == 0 || i == NumGas - 1) + { + MySingle *data = (MySingle *)(((char *)(&(SphP[i].Grad))) + grad_elements[k].offset_grad); + memset(data, 0, 3 * sizeof(MySingle)); + continue; + } +#endif /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) */ + /* if we get here, we have periodic boundary conditions or are not at the boundaries */ + double ValueL, ValueR; + + if(i == 0) + ValueL = getValue(NumGas - 1, k); + else + ValueL = getValue(i - 1, k); + + if(i == NumGas - 1) + ValueR = getValue(0, k); + else + ValueR = getValue(i + 1, k); + + double PosL = Mesh.DP[i - 1].x; + double PosR = Mesh.DP[i + 1].x; + + double grad = (ValueL - ValueR) / (PosL - PosR); + + MySingle *data = (MySingle *)(((char *)(&(SphP[i].Grad))) + grad_elements[k].offset_grad); + data[0] = grad; + data[1] = 0; + data[2] = 0; + + double ValueMin = dmin(ValueL, ValueR); + double ValueMax = dmax(ValueL, ValueR); + + if(Value + grad * (PosL - Pos) < ValueMin) + { + if(ValueMin < Value) + grad = (ValueMin - Value) / (PosL - Pos); + else + grad = 0.; + } + + if(Value + grad * (PosL - Pos) > ValueMax) + { + if(ValueMax > Value) + grad = (ValueMax - Value) / (PosL - Pos); + else + grad = 0.; + } + + if(Value + grad * (PosR - Pos) < ValueMin) + { + if(ValueMin < Value) + grad = (ValueMin - Value) / (PosR - Pos); + else + grad = 0.; + } + + if(Value + grad * (PosR - Pos) > ValueMax) + { + if(ValueMax > Value) + grad = (ValueMax - Value) / (PosR - Pos); + else + grad = 0.; + } + + data[0] = grad; + } + } + +#ifdef OUTPUT_DIVVEL + compute_divvel(); +#endif /* #ifdef OUTPUT_DIVVEL */ + + CPU_Step[CPU_GRADIENTS] += measure_time(); +} + +#ifdef OUTPUT_DIVVEL +/*! \brief Calculates velocity divergence in 1d simulation. + * + * Using Gauss' theorem. + * + * \return void + */ +void compute_divvel() +{ + face *VF = Mesh.VF; + double VelxL, VelxR; + + int idx, i; + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(i == 0) + { +#if defined(ONEDIMS_SPHERICAL) || defined(REFLECTIVE_X) + VelxL = P[i].Vel[0]; +#else /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) */ + VelxL = P[NumGas - 1].Vel[0]; +#endif /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) #else */ + } + else + VelxL = P[i - 1].Vel[0]; + + if(i == NumGas - 1) + { +#if defined(ONEDIMS_SPHERICAL) || defined(REFLECTIVE_X) + VelxR = P[i].Vel[0]; +#else /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) */ + VelxR = P[0].Vel[0]; +#endif /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) #else */ + } + else + VelxR = P[i + 1].Vel[0]; + + SphP[i].DivVel = 0.5 * (VF[i].area * VelxR - VF[i - 1].area * VelxL) / SphP[i].Volume; + } +} +#endif /* #ifdef OUTPUT_DIVVEL */ + +#endif /* #if defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c new file mode 100644 index 0000000000..8077b9b0a0 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c @@ -0,0 +1,425 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_refinement.c + * \date 05/2018 + * \brief Contains routines for refinement. + * \details contains functions: + * static void refine_add_ngb(int i, int j) + * int do_refinements(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 23.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#ifdef REFINEMENT_SPLIT_CELLS + +static int *ref_SphP_dp_index; +static int *first_ngb, *last_ngb, first_free_ngb; + +/*! \brief Linked list for neighbor data. + * + */ +static struct ngb_data +{ + int index; + int next_ngb; +} * ngb; + +/*! \brief Add element to linked neighbor list. + * + * \param[in] i Index of existing cell. + * \param[in] j Index of new cell. + * + * \return void + */ +static void refine_add_ngb(int i, int j) +{ + if(i >= 0 && j >= 0) + { + if(i >= Mesh.Ndp || j >= Mesh.Ndp) + { + terminate("i>= Ndp || j>= Ndp"); + } + + if(first_ngb[i] >= 0) + { + ngb[last_ngb[i]].next_ngb = first_free_ngb; + last_ngb[i] = first_free_ngb; + } + else + { + first_ngb[i] = last_ngb[i] = first_free_ngb; + } + + ngb[first_free_ngb].index = j; + ngb[first_free_ngb].next_ngb = -1; + first_free_ngb++; + } +} + +/*! \brief Loops through active cells and refine cells if needed. + * + * Splits the cell in random direction; moves mesh-generating point by + * 0.025 cell radius and inserts a second mesh-generating point opposite to + * split the cell into two. + * + * \return Number of cells that were refined. + */ +int do_refinements(void) +{ + char buf[1000]; + int idx, i, j, k, count, countall; + double rad, fac; + MyIDType newid = 0; + + TIMER_START(CPU_REFINE); + + ref_SphP_dp_index = mymalloc_movable(&ref_SphP_dp_index, "ref_SphP_dp_index", NumGas * sizeof(int)); + + int NActiveParticles = TimeBinsHydro.NActiveParticles; /* save this since refinement is going to change it */ + for(idx = 0, count = 0; idx < NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(should_this_cell_be_split(i)) + { + ref_SphP_dp_index[i] = -1; + count++; + } + } + + MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + mpi_printf("REFINE: want to refine %d cells\n", countall); + + if(countall) + { + domain_resize_storage(count, count, 2); + + if(NumPart + count >= All.MaxPart) + { + sprintf(buf, "On Task=%d with NumPart=%d we try to produce %d cells. Sorry, no space left...(All.MaxPart=%d)\n", ThisTask, + NumPart, count, All.MaxPart); + terminate(buf); + } + + if(NumGas + count >= All.MaxPartSph) + { + sprintf(buf, "On Task=%d with NumGas=%d we try to produce %d cells. Sorry, no space left...(All.MaxPartSph=%d)\n", ThisTask, + NumGas, count, All.MaxPartSph); + terminate(buf); + } + + if(All.MaxID == 0) /* MaxID not calculated yet */ + calculate_maxid(); + + int *list = mymalloc("list", NTask * sizeof(int)); + + MPI_Allgather(&count, 1, MPI_INT, list, 1, MPI_INT, MPI_COMM_WORLD); + + newid = All.MaxID + 1; + + for(i = 0; i < ThisTask; i++) + newid += list[i]; + + All.MaxID += countall; + + myfree(list); + + Ngb_MarkerValue++; + int nchanged = 0; + int *nodelist = (int *)mymalloc("nodelist", NTopleaves * sizeof(int)); + + /* create explicit list of neighbors */ + + first_ngb = mymalloc("first_ngb", Mesh.Ndp * sizeof(int)); + ngb = mymalloc("ngbs", 2 * Mesh.Nvf * sizeof(struct ngb_data)); + last_ngb = mymalloc("last_ngb", Mesh.Ndp * sizeof(int)); + + for(i = 0; i < Mesh.Ndp; i++) + { + first_ngb[i] = last_ngb[i] = -1; + + if(Mesh.DP[i].task == ThisTask) + { + int li = Mesh.DP[i].index; + if(li >= 0 && li < NumGas) + if(ref_SphP_dp_index[li] < 0) + ref_SphP_dp_index[li] = i; /* only guaranteed to be set for active cells */ + } + } + + for(i = 0, first_free_ngb = 0; i < Mesh.Nvf; i++) + { + refine_add_ngb(Mesh.VF[i].p1, Mesh.VF[i].p2); + refine_add_ngb(Mesh.VF[i].p2, Mesh.VF[i].p1); + } + + myfree(last_ngb); + + int NActiveParticles = TimeBinsHydro.NActiveParticles; + for(idx = 0, count = 0; idx < NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(should_this_cell_be_split(i)) + { + int addToGravList = TimeBinSynchronized[P[i].TimeBinGrav]; + if(NumPart > NumGas) + { + move_collisionless_particle(NumPart + count, NumGas + count); + if(TimeBinSynchronized[P[NumPart + count].TimeBinGrav] && P[i].Mass > 0) + addToGravList = 0; + + /* there is already an entry in the list of active particles for + gravity that points to the index that we will use for our new cell */ + } + + /* now split the gas cell */ + + j = NumGas + count; + + P[j] = P[i]; + SphP[j] = SphP[i]; + + P[j].ID = newid++; + + rad = get_cell_radius(i); + + double dir[3]; +#ifdef TWODIMS + double phi = 2 * M_PI * get_random_number(); + + dir[0] = cos(phi); + dir[1] = sin(phi); + dir[2] = 0; +#else /* #ifdef TWODIMS */ + double theta = acos(2 * get_random_number() - 1); + double phi = 2 * M_PI * get_random_number(); + + dir[0] = sin(theta) * cos(phi); + dir[1] = sin(theta) * sin(phi); + dir[2] = cos(theta); +#endif /* #ifdef TWODIMS */ + fac = 0.025 * rad; + + P[j].Pos[0] = P[i].Pos[0] + fac * dir[0]; + P[j].Pos[1] = P[i].Pos[1] + fac * dir[1]; + P[j].Pos[2] = P[i].Pos[2] + fac * dir[2]; + + SphP[j].SepVector[0] = SphP[i].SepVector[0] = dir[0]; + SphP[j].SepVector[1] = SphP[i].SepVector[1] = dir[1]; + SphP[j].SepVector[2] = SphP[i].SepVector[2] = dir[2]; + + /**** create the voronoi cell of i as an auxiliary mesh */ + + int jj = ref_SphP_dp_index[i]; /* this is the delaunay point of this cell */ + if(jj < 0) + terminate("jj < 0"); + + initialize_and_create_first_tetra(&DeRefMesh); + + DeRefMesh.DTC = mymalloc_movable(&DeRefMesh.DTC, "DeRefDTC", DeRefMesh.MaxNdt * sizeof(tetra_center)); + DeRefMesh.DTF = mymalloc_movable(&DeRefMesh.DTF, "DeRefDTF", DeRefMesh.MaxNdt * sizeof(char)); + for(k = 0; k < DeRefMesh.Ndt; k++) + DeRefMesh.DTF[k] = 0; + + int tlast = 0; + + k = first_ngb[jj]; + while(k >= 0) + { + int q = ngb[k].index; + + if(DeRefMesh.Ndp + 2 >= DeRefMesh.MaxNdp) + { + DeRefMesh.Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + DeRefMesh.MaxNdp = DeRefMesh.Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, DeRefMesh.MaxNdp, + DeRefMesh.Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + DeRefMesh.DP -= 5; + DeRefMesh.DP = myrealloc_movable(DeRefMesh.DP, (DeRefMesh.MaxNdp + 5) * sizeof(point)); + DeRefMesh.DP += 5; + } + + DeRefMesh.DP[DeRefMesh.Ndp] = Mesh.DP[q]; + + double r = + sqrt(pow(DeRefMesh.DP[DeRefMesh.Ndp].x - P[i].Pos[0], 2) + pow(DeRefMesh.DP[DeRefMesh.Ndp].y - P[i].Pos[1], 2) + + pow(DeRefMesh.DP[DeRefMesh.Ndp].z - P[i].Pos[2], 2)); + + if(r < 2 * fac) + terminate("We are trying to split a heavily distorted cell... We better stop. Check your refinement criterion."); + +#ifndef OPTIMIZE_MEMORY_USAGE + set_integers_for_point(&DeRefMesh, DeRefMesh.Ndp); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + tlast = insert_point(&DeRefMesh, DeRefMesh.Ndp, tlast); + + DeRefMesh.Ndp++; + k = ngb[k].next_ngb; + } + + /* now add also the point jj itself (the one that is to be split */ + + DeRefMesh.DP[DeRefMesh.Ndp] = Mesh.DP[jj]; +#ifndef OPTIMIZE_MEMORY_USAGE + set_integers_for_point(&DeRefMesh, DeRefMesh.Ndp); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + tlast = insert_point(&DeRefMesh, DeRefMesh.Ndp, tlast); + DeRefMesh.Ndp++; + + /* and finally, add the newly generated point */ + + DeRefMesh.DP[DeRefMesh.Ndp].x = P[j].Pos[0]; + DeRefMesh.DP[DeRefMesh.Ndp].y = P[j].Pos[1]; + DeRefMesh.DP[DeRefMesh.Ndp].z = P[j].Pos[2]; + DeRefMesh.DP[DeRefMesh.Ndp].ID = P[j].ID; +#ifndef OPTIMIZE_MEMORY_USAGE + set_integers_for_point(&DeRefMesh, DeRefMesh.Ndp); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + tlast = insert_point(&DeRefMesh, DeRefMesh.Ndp, tlast); + DeRefMesh.Ndp++; + + /* compute circumcircles */ + compute_circumcircles(&DeRefMesh); + + double *Volume = mymalloc("Volume", DeRefMesh.Ndp * sizeof(double)); + + derefine_refine_compute_volumes(Volume); + + double voli = Volume[DeRefMesh.Ndp - 2]; + double volj = Volume[DeRefMesh.Ndp - 1]; + + myfree(Volume); + + myfree(DeRefMesh.DTF); + myfree(DeRefMesh.DTC); + DeRefMesh.DTC = NULL; + + myfree(DeRefMesh.DT); + myfree(DeRefMesh.DP - 5); + myfree(DeRefMesh.VF); + + /* now split the conserved variables according to the volume ratio of the split */ + + double faci = voli / (voli + volj); + double facj = volj / (voli + volj); + + P[i].Mass *= faci; + P[j].Mass *= facj; + SphP[i].OldMass *= faci; + SphP[j].OldMass *= facj; + + SphP[i].Energy *= faci; + SphP[j].Energy *= facj; + +#ifdef MHD + for(k = 0; k < 3; k++) + { + SphP[i].B[k] = SphP[i].BConserved[k] / (voli + volj); + SphP[j].B[k] = + SphP[i].B[k] + SphP[i].Grad.dB[k][0] * (P[j].Pos[0] - P[i].Pos[0]) + + SphP[i].Grad.dB[k][1] * (P[j].Pos[1] - P[i].Pos[1]) + + SphP[i].Grad.dB[k][2] * (P[j].Pos[2] - P[i].Pos[2]); /* extrapolate B to the position of the new cell */ + + /* update conserved variables */ + SphP[i].BConserved[k] = SphP[i].B[k] * voli; + SphP[j].BConserved[k] = SphP[j].B[k] * volj; + } +#endif /* #ifdef MHD */ + + for(k = 0; k < 3; k++) + { + SphP[i].Momentum[k] *= faci; + SphP[j].Momentum[k] *= facj; + } + +#ifdef USE_SFR + SphP[i].Sfr *= faci; + SphP[j].Sfr *= facj; +#endif /* #ifdef USE_SFR */ + +#ifdef MAXSCALARS + for(int s = 0; s < N_Scalar; + s++) /* Note, the changes in MATERIALS, HIGHRESGASMASS, etc., are treated as part of the Scalars */ + { + *(MyFloat *)(((char *)(&SphP[i])) + scalar_elements[s].offset_mass) *= faci; + *(MyFloat *)(((char *)(&SphP[j])) + scalar_elements[s].offset_mass) *= facj; + } +#endif /* #ifdef MAXSCALARS */ + +#ifdef REFINEMENT_HIGH_RES_GAS + /* the change in the SphP[].HighResMass is treated as part of the Scalars loop above */ + SphP[i].AllowRefinement += 2; /* increment the refinement "generation" of both cells */ + SphP[j].AllowRefinement += 2; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + + /* add the new particle into the neighbour tree */ + int no = Ngb_Nextnode[i]; + Ngb_Nextnode[i] = j; + Ngb_Nextnode[j] = no; + Ngb_Father[j] = Ngb_Father[i]; + + ngb_update_rangebounds(j, &nchanged, nodelist); + + /* now add the new particle into the link-lists for the time integration */ + + timebin_add_particle(&TimeBinsHydro, j, i, P[i].TimeBinHydro, 1); + timebin_add_particle(&TimeBinsGravity, j, i, P[i].TimeBinGrav, addToGravList); + + SphP[j].first_connection = -1; + SphP[j].last_connection = -1; + + count++; + } + } + + NumPart += count; + NumGas += count; + All.TotNumPart += countall; + All.TotNumGas += countall; + + myfree(ngb); + myfree(first_ngb); + + ngb_finish_rangebounds_update(nchanged, nodelist); + + myfree(nodelist); + } + + myfree(ref_SphP_dp_index); + + TIMER_STOP(CPU_REFINE); + + return countall; +} + +#endif /* REFINEMENT_SPLIT_CELLS */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c new file mode 100644 index 0000000000..94ce562398 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c @@ -0,0 +1,501 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_utils.c + * \date 05/2018 + * \brief Utilities for 3d Voronoi mesh + * \details contains functions: + * double cross_section_plane_cell(int sphp_index, int dp_index, double *center, double *n) + * void intersections_plane_cell(int sphp_index, int dp_index, double *center, double *n, double *polygon, unsigned int + * *nof_polygon_elements) void intersection_plane_grid(double *center, double *n, const char *filename) static double + * polygon_area(double *polygon, unsigned int nof_elements) static int qs_partition(double *A, int p, int r, double *B) static void + * qs_sort(double *A, int p, int r, double *B) static double calc_phi(double x, double y) static void rotate_z(double *vec, const + * double alpha) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 23.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if !defined(TWODIMS) && !defined(ONEDIMS) + +// helper functions for cross_section_plane_cell and intersections_plane_cell: +static int qs_partition(double *A, int p, int r, double *B); +static void qs_sort(double *A, int p, int r, double *B); +static double calc_phi(double x, double y); +static void rotate_z(double *vec, const double alpha); +static void rotate_y(double *vec, const double alpha); + +#ifdef TETRA_INDEX_IN_FACE +static double polygon_area(double *polygon, unsigned int nof_elements); +static const unsigned int max_poly_elements = 128; + +/*! \brief Calculates the cross section between a plane and a Voronoi cell(3D). + * + * \param[in] sphp_index The hydro index of the cell. + * \param[in] dp_index The delaunay point index of the cell. + * \param[in] center A point in the plane. + * \param[in] n A vector starting at center and normal to the plane. + * + * \return The cross section between the plane and the cell. + */ +double cross_section_plane_cell(int sphp_index, int dp_index, double *center, double *n) +{ + double polygon[max_poly_elements]; + unsigned int nof_elements = 0; + + intersections_plane_cell(sphp_index, dp_index, center, n, polygon, &nof_elements); + + // polygon has to contain at least 3 points + if(nof_elements < 6) + { + return 0; + } + else + { + return polygon_area(polygon, nof_elements); + } +} + +/*! \brief Calculates the intersections between a plane and a cell. + * + * \param[in] sphp_index The hydro index of the cell. + * \param[in] dp_index The Delaunay point index of the cell. + * \param[in] center A point in the plane. + * \param[in] n A vector starting at center and normal to the plane. + * \param[out] polygon Store the intersections (polygon) in this array. + * \param[out] nof_polygon_elements The number of stored elements in the + * polygon array. + * + * \return void + */ +void intersections_plane_cell(int sphp_index, int dp_index, double *center, double *n, double *polygon, + unsigned int *nof_polygon_elements) +{ + // memory for the line segments + unsigned int line_segments_max = 2000; + double *ls = (double *)mymalloc("line_segments", line_segments_max * sizeof(double)); + + // get the line segments + unsigned int nof_elements = 0; + get_line_segments(sphp_index, dp_index, ls, &nof_elements, line_segments_max); + assert(nof_elements % 6 == 0); // 6 doubles represent one line segment + + // start the calculation + unsigned int i; + double phi; + + if(n[0] == 0 && n[1] == 0) + { + phi = 0; + } + else + { + phi = calc_phi(n[0], n[1]); + } + + double r = sqrt(n[0] * n[0] + n[1] * n[1] + n[2] * n[2]); + assert(r > 0); + double theta = acos(n[2] / r); + + double lambda; // z1 + lambda * (z2 - z1) = 0 + + unsigned int max_phi_elms = max_poly_elements / 2; + double phi_values[max_phi_elms]; // phi coordinates of the points of the polygon + unsigned int p = 0; // number of points of the polygon + + // balance point of the polygon + double bal_p_x = 0; + double bal_p_y = 0; + + for(i = 0; i < nof_elements; i += 6) + { + // transform line segment to the center frame + ls[i] -= center[0]; // x1 + ls[i + 1] -= center[1]; // y1 + ls[i + 2] -= center[2]; // z1 + ls[i + 3] -= center[0]; // x2 + ls[i + 4] -= center[1]; // y2 + ls[i + 5] -= center[2]; // z2 + + // rotate line segment such that the cross secting plane is in the x-y plane / the normal vector of the plane is on the z-axis + rotate_z(&ls[i], -phi); + rotate_y(&ls[i], -theta); + + rotate_z(&ls[i + 3], -phi); + rotate_y(&ls[i + 3], -theta); + + if(ls[i + 2] == ls[i + 5]) // same z-coords + { + if(ls[i + 2] != 0) // no intersection + { + lambda = -1; + } + else + { + lambda = 0; // take first point as intersection + } + } + else + { + lambda = ls[i + 2] / (ls[i + 2] - ls[i + 5]); + } + + if(lambda >= 0 && lambda <= 1) // line segment intersects plane + { + if(p == max_phi_elms) + { + terminate("termination in voronoi_utils.c: intersections_plane_cell: not enough memory!\n"); + } + + polygon[2 * p] = ls[i] + lambda * (ls[i + 3] - ls[i]); // x coordinate of the intersection + polygon[2 * p + 1] = ls[i + 1] + lambda * (ls[i + 4] - ls[i + 1]); // y coordinate of the intersection + + bal_p_x += polygon[2 * p]; + bal_p_y += polygon[2 * p + 1]; + + p++; + } + } + + // free memory + myfree(ls); + + // polygon has to contain at least 3 points + if(p < 3) + { + return; + } + + // switch frame to balance point of the polygon + bal_p_x /= p; + bal_p_y /= p; + + for(i = 0; i < p; i++) + { + polygon[2 * i] -= bal_p_x; + polygon[2 * i + 1] -= bal_p_y; + + // calculate the phi values + phi_values[i] = calc_phi(polygon[2 * i], polygon[2 * i + 1]); + } + + // sort polygon + qs_sort(phi_values, 0, p - 1, polygon); + + // close polygon + polygon[2 * p] = polygon[0]; + polygon[2 * p + 1] = polygon[1]; + phi_values[p] = phi_values[0]; + p++; + + // transform back + for(i = 0; i < p; i++) + { + polygon[2 * i] += bal_p_x; + polygon[2 * i + 1] += bal_p_y; + } + + *nof_polygon_elements = 2 * p; +} + +/*! \brief Write out the intersections between a plane and the grid + * (for plotting). + * + * Binary output: + * int: Number of elements in the first array. + * int: Number of elements in the second array. + * int[]: Array, which stores the number of intersections for each intersected + * cell. + * The j-th entry gives the number of elements in the intersections + * array which correspond to the j-th intersected cell. + * double[]: intersections array, all intersections are stored in the + * order x1,y1,x2,y2,x3,y3,... + * + * The intersections are given in a coordinate system where n is the z-axis + * and which has its origin at center. + * + * \param[in] center A point in the plane. + * \param[in] n A vector starting at center and normal to the plane. + * \param[in] filename Filename. + * + * \return void + */ +void intersection_plane_grid(double *center, double *n, const char *filename) +{ + if(NTask != 1) + { + terminate("termination in voronoi_utils.c: intersection_plane_grid: not yet parallelized!\n"); + } + + double phi; + + if(n[0] == 0 && n[1] == 0) + { + phi = 0; + } + else + { + phi = calc_phi(n[0], n[1]); + } + + double r = sqrt(n[0] * n[0] + n[1] * n[1] + n[2] * n[2]); + assert(r > 0); + double theta = acos(n[2] / r); + + double xaxis[3] = {1, 0, 0}; + double yaxis[3] = {0, 1, 0}; + double zaxis[3] = {0, 0, 1}; + + rotate_y(xaxis, theta); + rotate_z(xaxis, phi); + + rotate_y(yaxis, theta); + rotate_z(yaxis, phi); + + rotate_y(zaxis, theta); + rotate_z(zaxis, phi); + + printf("normal vector: (%f, %f, %f)\n", n[0], n[1], n[2]); + printf("Coordinate system of output data: \n"); + printf("center: (%f, %f, %f)\n", center[0], center[1], center[2]); + printf("x-axis: (%f, %f, %f)\n", xaxis[0], xaxis[1], xaxis[2]); + printf("y-axis: (%f, %f, %f)\n", yaxis[0], yaxis[1], yaxis[2]); + printf("z-axis: (%f, %f, %f)\n", zaxis[0], zaxis[1], zaxis[2]); + + const int cells_max_elms = NumGas; + int *nof_intersections = (int *)mymalloc("number of intersections", cells_max_elms * sizeof(int)); + unsigned int l = 0; + + const int polygons_max_elms = NumGas * 5; + double *polygons = (double *)mymalloc("polygons", polygons_max_elms * 5 * sizeof(int)); + unsigned int j = 0; + + unsigned int nof_polygon_elements = 0; + + unsigned int k = 0; + + for(k = 0; k < NumGas; k++) + { + nof_polygon_elements = 0; + intersections_plane_cell(k, k, center, n, &polygons[j], &nof_polygon_elements); + + if(nof_polygon_elements != 0) + { + nof_intersections[l] = (int)nof_polygon_elements; + l++; + + j += nof_polygon_elements; + + if(j > polygons_max_elms - 100) + { + terminate("termination in voronoi_utils.c: intersection_plane_grid: not enough memory for the polygons!\n"); + } + } + } + + // binary output + FILE *pFile; + + pFile = fopen(filename, "wb"); + + fwrite(&l, sizeof(int), 1, pFile); // number of intersected cells + fwrite(&j, sizeof(int), 1, pFile); // number of elements in polygons array + fwrite(nof_intersections, sizeof(int), l, pFile); + fwrite(polygons, sizeof(double), j, pFile); + + fclose(pFile); + + myfree(polygons); + myfree(nof_intersections); +} + +/*! \brief Calculate the area of a 2D polygon. + * + * Formula (wikipedia):A = 0.5 * sum_i=0^{n-1}(x_i * y_{i+1} - x_{i+1} * y_i). + * + * \param[in] polygon Array of points of the polygon: x1, y1, x2, y2, ..., + * has to be sorted counterclockwise and closed + * (x_n == x_0 && y_n == y_0). + * \param[in] nof_elements Number of elements in the array. + * + * \return Area of polygon. + */ +static double polygon_area(double *polygon, unsigned int nof_elements) +{ + assert(nof_elements >= 8); + + double result = 0; + + unsigned int k; + + for(k = 0; k < nof_elements - 2; k += 2) + { + result += polygon[k] * polygon[k + 3] - polygon[k + 2] * polygon[k + 1]; + } + + result *= 0.5; + + assert(result >= 0); + + return result; +} + +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + +/*! \brief Quicksort partitioning function, helper for qs_sort. + * + * \param[in, out] A array to be sorted, usually angle phi. + * \param[in] p Lower index for quicksort. + * \param[in] r Upper index for quicksort. + * \param[in, out] B Array that also changes ordering the same way as A. + * + * \return Index for partitioning. + */ +static int qs_partition(double *A, int p, int r, double *B) +{ + double x = A[r]; + double tmp; + double tmp2; + int i = p - 1; + int j; + + for(j = p; j < r; j++) + { + if(A[j] <= x) + { + // switch phi values ( i <-> j ) + i++; + tmp = A[i]; + A[i] = A[j]; + A[j] = tmp; + + // switch coordinates ( 2i, 2i+1 <-> 2j, 2j+1) + tmp = B[2 * i]; + tmp2 = B[2 * i + 1]; + B[2 * i] = B[2 * j]; + B[2 * i + 1] = B[2 * j + 1]; + B[2 * j] = tmp; + B[2 * j + 1] = tmp2; + } + } + + // switch phi values + tmp = A[i + 1]; + A[i + 1] = A[r]; + A[r] = tmp; + + // switch coordinates + tmp = B[(i + 1) * 2]; + tmp2 = B[(i + 1) * 2 + 1]; + + B[(i + 1) * 2] = B[2 * r]; + B[(i + 1) * 2 + 1] = B[2 * r + 1]; + + B[2 * r] = tmp; + B[2 * r + 1] = tmp2; + + return i + 1; +} + +/*! \brief Quick-sorts the points of the polygon with respect to phi. + * + * \param[in, out] A array to be sorted, usually angle phi. + * \param[in] p lower index for quicksort. + * \param[in] r upper index for quicksort. + * \param[in, out] B array that also changes ordering the same way as A; + * usually polygon. + * + * \return void + */ +static void qs_sort(double *A, int p, int r, double *B) +{ + int q; + + if(p < r) + { + q = qs_partition(A, p, r, B); + qs_sort(A, p, q - 1, B); + qs_sort(A, q + 1, r, B); + } +} + +/*! \brief Calculates the phi coordinate of a point. + * + * Calculates polar angle in a 2d coordinate system from Cartesian coordinate + * system. + * + * \param[in] x X coordinate. + * \param[in] y Y coordinate. + * + * \return Phi (polar angle). + */ +static double calc_phi(double x, double y) +{ + // if both arguments are zero an error occurs in atan2 + if((x == 0) && (y == 0)) + { + fprintf(stderr, "ERROR in calc_phi: both arguments are zero\n"); + return 0; + } + + double p = atan2(y, x); // in [-pi,pi] + + if(p < 0) + { + return p + 2 * M_PI; + } + + return p; +} + +/*! \brief Rotate a vector around the z axis. + * + * \param[in, out] vec Array to 3 dimensional vector to be rotated. + * \param[in] alpha Rotation angle. + * + * \return void + */ +static void rotate_z(double *vec, const double alpha) +{ + double vx_tmp = vec[0]; + vec[0] = cos(alpha) * vec[0] - sin(alpha) * vec[1]; + vec[1] = sin(alpha) * vx_tmp + cos(alpha) * vec[1]; +} + +/*! \brief Rotate a vector around the y axis. + * + * \param[in, out] vec Array to 3 dimensional vector to be rotated. + * \param[in] alpha Rotation angle. + * + * \return void + */ +static void rotate_y(double *vec, const double alpha) +{ + double vx_tmp = vec[0]; + + vec[0] = cos(alpha) * vec[0] + sin(alpha) * vec[2]; + vec[2] = -sin(alpha) * vx_tmp + cos(alpha) * vec[2]; +} + +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c b/src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c new file mode 100644 index 0000000000..3fd92c29e6 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c @@ -0,0 +1,321 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/checksummed_sendrecv.c + * \date 05/2018 + * \brief MPI send-receive communication with checksum to verify + * communication. + * \details contains functions: + * int MPI_Check_Sendrecv(void *sendbuf, int sendcount, + * MPI_Datatype sendtype, int dest, int sendtag, + * void *recvbufreal, int recvcount, MPI_Datatype recvtype, + * int source, int recvtag, MPI_Comm comm, + * MPI_Status * status) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef MPISENDRECV_CHECKSUM + +#undef MPI_Sendrecv + +/*! \brief MPI_Sendrecv with built-in check if message arrived properly. + * + * \param[in] sendbuf Initial address of send buffer. + * \param[in] sendcount Number of elements in send buffer. + * \param[in] sendtype Type of elements in send buffer. + * \param[in] dest Rank of destination. + * \param[in] sendtag Send tag. + * \param[out] recvbufreal Initial adress of receive buffer. + * \param[in] recvcount Number of elements in receive buffer. + * \param[in] recvtype Type of elements in receive buffer . + * \param[in] source Rank of source. + * \param[in] recvtag Receive tag. + * \param[in] comm Communicator + * \param[out] status Status object; this refers to receive operation. + * + * \return 0 + */ +int MPI_Check_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbufreal, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status) +{ + int checksumtag = 1000, errtag = 2000; + int i, iter = 0, err_flag, err_flag_imported, size_sendtype, size_recvtype; + long long sendCheckSum, recvCheckSum, importedCheckSum; + unsigned char *p, *buf, *recvbuf; + char msg[500]; + + if(dest != source) + terminate("destination task different from source task"); + + MPI_Type_size(sendtype, &size_sendtype); + MPI_Type_size(recvtype, &size_recvtype); + + if(dest == ThisTask) + { + memcpy(recvbufreal, sendbuf, recvcount * size_recvtype); + return 0; + } + + if(!(buf = mymalloc(recvcount * size_recvtype + 1024))) + terminate("not enough memory to allocate the buffer buf"); + + for(i = 0, p = buf; i < recvcount * size_recvtype + 1024; i++) + *p++ = 255; + + recvbuf = buf + 512; + + MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status); + + for(i = 0, p = buf; i < 512; i++, p++) + { + if(*p != 255) + { + sprintf(msg, "MPI-ERROR: Task=%d/%s: Recv occured before recv buffer. message-size=%d from %d, i=%d c=%d\n", ThisTask, + getenv("HOST"), recvcount, dest, i, *p); + terminate(msg); + } + } + + for(i = 0, p = recvbuf + recvcount * size_recvtype; i < 512; i++, p++) + { + if(*p != 255) + { + sprintf(msg, "MPI-ERROR: Task=%d/%s: Recv occured after recv buffer. message-size=%d from %d, i=%d c=%d\n", ThisTask, + getenv("HOST"), recvcount, dest, i, *p); + terminate(msg); + } + } + + for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++) + sendCheckSum += *p; + + importedCheckSum = 0; + + if(dest > ThisTask) + { + if(sendcount > 0) + MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); + if(recvcount > 0) + MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); + } + else + { + if(recvcount > 0) + MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); + if(sendcount > 0) + MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); + } + + checksumtag++; + + for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount * size_recvtype; i++, p++) + recvCheckSum += *p; + + err_flag = err_flag_imported = 0; + + if(recvCheckSum != importedCheckSum) + { + printf( + "MPI-ERROR: Receive error on task=%d/%s from task=%d, message size=%d, sendcount=%d checksums= %d %d %d %d. Try to fix " + "it...\n", + ThisTask, getenv("HOST"), source, recvcount, sendcount, (int)(recvCheckSum >> 32), (int)recvCheckSum, + (int)(importedCheckSum >> 32), (int)importedCheckSum); + myflush(stdout); + + err_flag = 1; + } + + if(dest > ThisTask) + { + MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); + MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); + } + else + { + MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); + MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); + } + errtag++; + + if(err_flag > 0 || err_flag_imported > 0) + { + printf("Task=%d is on %s, wants to send %d and has checksum=%d %d of send data\n", ThisTask, getenv("HOST"), sendcount, + (int)(sendCheckSum >> 32), (int)sendCheckSum); + myflush(stdout); + + do + { + sendtag++; + recvtag++; + + for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++) + *p = 0; + + if((iter & 1) == 0) + { + if(dest > ThisTask) + { + if(sendcount > 0) + MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD); + if(recvcount > 0) + MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status); + } + else + { + if(recvcount > 0) + MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status); + if(sendcount > 0) + MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD); + } + } + else + { + if(iter > 5) + { + printf("we're trying to send each byte now on task=%d (iter=%d)\n", ThisTask, iter); + myflush(stdout); + if(dest > ThisTask) + { + for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++) + MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD); + for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++) + MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status); + } + else + { + for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++) + MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status); + for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++) + MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD); + } + } + else + { + MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, + status); + } + } + + importedCheckSum = 0; + + for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++) + sendCheckSum += *p; + + printf("Task=%d gas send_checksum=%d %d\n", ThisTask, (int)(sendCheckSum >> 32), (int)sendCheckSum); + myflush(stdout); + + if(dest > ThisTask) + { + if(sendcount > 0) + MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); + if(recvcount > 0) + MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); + } + else + { + if(recvcount > 0) + MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); + if(sendcount > 0) + MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); + } + + for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount; i++, p++) + recvCheckSum += *p; + + err_flag = err_flag_imported = 0; + + if(recvCheckSum != importedCheckSum) + { + printf( + "MPI-ERROR: Again (iter=%d) a receive error on task=%d/%s from task=%d, message size=%d, checksums= %d %d %d %d. " + "Try to fix it...\n", + iter, ThisTask, getenv("HOST"), source, recvcount, (int)(recvCheckSum >> 32), (int)recvCheckSum, + (int)(importedCheckSum >> 32), (int)importedCheckSum); + myflush(stdout); + err_flag = 1; + } + + if(dest > ThisTask) + { + MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); + MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); + } + else + { + MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); + MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); + } + + if(err_flag == 0 && err_flag_imported == 0) + break; + + errtag++; + checksumtag++; + iter++; + } + while(iter < 10); + + if(iter >= 10) + { + char buf[1000]; + int length; + FILE *fd; + + sprintf(buf, "send_data_%d.dat", ThisTask); + fd = fopen(buf, "w"); + length = sendcount * size_sendtype; + fwrite(&length, 1, sizeof(int), fd); + fwrite(sendbuf, sendcount, size_sendtype, fd); + fclose(fd); + + sprintf(buf, "recv_data_%d.dat", ThisTask); + fd = fopen(buf, "w"); + length = recvcount * size_recvtype; + fwrite(&length, 1, sizeof(int), fd); + fwrite(recvbuf, recvcount, size_recvtype, fd); + fclose(fd); + + sprintf(msg, "MPI-ERROR: Even 10 trials proved to be insufficient on task=%d/%s. Stopping\n", ThisTask, getenv("HOST")); + terminate(msg); + } + } + + memcpy(recvbufreal, recvbuf, recvcount * size_recvtype); + + myfree(buf); + + return 0; +} + +#endif /* #ifdef MPISENDRECV_CHECKSUM */ diff --git a/src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c b/src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c new file mode 100644 index 0000000000..e421807148 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c @@ -0,0 +1,94 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/hypercube_allgatherv.c + * \date 05/2018 + * \brief Home-made MPI_Allgatherv routine. + * \details contains functions: + * int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, + * MPI_Datatype sendtype, void *recvbuf, int *recvcount, + * int *displs, MPI_Datatype recvtype, MPI_Comm comm) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef MPI_HYPERCUBE_ALLGATHERV + +#define TAG 100 +/*! \brief Allgatherv routine based on MPI_Sendrecv calls. + * + * \param[in] sendbuf Starting address of send buffer. + * \param[in] sendcount Number of elements in send buffer. + * \param[in] sendtype Data type of send buffer elements. + * \param[out] recvbuf Address of receive buffer. + * \param[in] recvcount Integer array (of length group size) containing the + * number of elements that are to be received from each process. + * \param[in] displs Integer array (of length group size). Entry i specifies + * the displacement (relative to recvbuf ) at which to place the + * incoming data from process. + * \param[in] recvtype Data type of receive buffer elements. + * \param[in] comm Communicator. + * + * \return 0 + */ +int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, + MPI_Datatype recvtype, MPI_Comm comm) +{ + int ntask, thistask, ptask, ngrp, size_sendtype, size_recvtype; + MPI_Status status; + + MPI_Comm_rank(comm, &thistask); + MPI_Comm_size(comm, &ntask); + + MPI_Type_size(sendtype, &size_sendtype); + MPI_Type_size(recvtype, &size_recvtype); + + for(ptask = 0; ntask > (1 << ptask); ptask++) + ; + + for(ngrp = 1; ngrp < (1 << ptask); ngrp++) + { + int recvtask = thistask ^ ngrp; + + if(recvtask < ntask) + MPI_Sendrecv(sendbuf, sendcount, sendtype, recvtask, TAG, recvbuf + displs[recvtask] * size_recvtype, recvcount[recvtask], + recvtype, recvtask, TAG, comm, &status); + } + + if(sendbuf != recvbuf + displs[thistask] * size_recvtype) + memcpy(recvbuf + displs[thistask] * size_recvtype, sendbuf, sendcount * size_sendtype); + + return 0; +} + +#endif /* #ifdef MPI_HYPERCUBE_ALLGATHERV */ diff --git a/src/amuse/community/arepo/src/mpi_utils/mpi_util.c b/src/amuse/community/arepo/src/mpi_utils/mpi_util.c new file mode 100644 index 0000000000..e5098c7396 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/mpi_util.c @@ -0,0 +1,375 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/mpi_util.c + * \date 05/2018 + * \brief Custom made auxiliary MPI functions. + * \details contains functions: + * void mpi_exchange_buffers(void *send_buf, int *send_count, + * int *send_offset, void *recv_buf, int *recv_count, + * int *recv_offset, int item_size, int commtag, + * int include_self) + * int mpi_calculate_offsets(int *send_count, int *send_offset, + * int *recv_count, int *recv_offset, int send_identical) + * int mesh_search_compare_task(const void *a, const void *b) + * int intpointer_compare(const void *a, const void *b) + * void *sort_based_on_mesh_search(mesh_search_data * search, + * void *data, int n_items, int item_size) + * void *sort_based_on_field(void *data, int field_offset, + * int n_items, int item_size) + * void mpi_distribute_items_from_search(mesh_search_data * + * search, void *data, int *n_items, int *max_n, int + * item_size, int commtag, int task_offset, int cell_offset) + * void mpi_distribute_items_to_tasks(void *data, + * int task_offset, int *n_items, int *max_n, int item_size, + * int commtag) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +static char *SaveData2; + +/*! \brief Implements the common idiom of exchanging buffers with every other + * MPI task. + * + * All arrays should be allocated with NTask size. + * + * \param[in] send_buf Pointer to data to be sent. + * \param[in] send_count Number of elements to be sent. + * \param[in] send_offset Array with offsets to communicate to specific task. + * \param[out] recv_buf Pointert to dataspace for incoming data. + * \param[in] recv_count Number of elements to be received. + * \param[in] recv_offset Array with offsets in receive buffer from specific + * task. + * \param[in] item_size Size of one element. + * \param[in] commtag Receive tag. + * \param[in] include_self Communication with own task included? + * + * \return void + */ +void mpi_exchange_buffers(void *send_buf, int *send_count, int *send_offset, void *recv_buf, int *recv_count, int *recv_offset, + int item_size, int commtag, int include_self) +{ + int ngrp; + // this loop goes from 0 in some cases, but that doesn't make sense + // because then recvTask==ThisTask and nothing is done. + for(ngrp = include_self ? 0 : 1; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(send_count[recvTask] > 0 || recv_count[recvTask] > 0) + { + /* exchange data */ + MPI_Sendrecv((char *)send_buf + (size_t)send_offset[recvTask] * item_size, (size_t)send_count[recvTask] * item_size, + MPI_BYTE, recvTask, commtag, (char *)recv_buf + (size_t)recv_offset[recvTask] * item_size, + (size_t)recv_count[recvTask] * item_size, MPI_BYTE, recvTask, commtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } +} + +/*! \brief Calculates offsets for MPI communication. + * + * Calculates the recv_count, send_offset, and recv_offset arrays + * based on the send_count. Returns nimport, the total number of + * particles to be received. If an identical set of copies are to be + * sent to all tasks, set send_identical=1 and the send_offset will + * be zero for all tasks. + * + * All arrays should be allocated with NTask size. + * + * \param[in] send_count Number of element to be sent. + * \param[out] send_offset Offset in send-buffer. + * \param[out] recv_count Number of elements in receive. + * \param[out] recv_offset Offest for receive buffer. + * \param[in] send_identical Include self-communication? + * + */ +int mpi_calculate_offsets(int *send_count, int *send_offset, int *recv_count, int *recv_offset, int send_identical) +{ + // Exchange the send/receive counts + MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + int nimport = 0; + recv_offset[0] = 0; + send_offset[0] = 0; + int j; + for(j = 0; j < NTask; j++) + { + nimport += recv_count[j]; + + if(j > 0) + { + send_offset[j] = send_offset[j - 1] + (send_identical ? 0 : send_count[j - 1]); + recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1]; + } + } + return nimport; +} + +/*! \brief Comparison function used to sort the mesh_search data by task. + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1), -1 if a < b. + */ +int mesh_search_compare_task(const void *a, const void *b) +{ + if((*(mesh_search_data **)a)->Task < (*(mesh_search_data **)b)->Task) + return -1; + + if((*(mesh_search_data **)a)->Task > (*(mesh_search_data **)b)->Task) + return +1; + + return 0; +} + +/*! \brief Comparison function used to sort an array of int pointers into order + * of the pointer targets. + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1), -1 if a < b. + */ +int intpointer_compare(const void *a, const void *b) +{ + if((**(int **)a) < (**(int **)b)) + return -1; + + if((**(int **)a) > (**(int **)b)) + return +1; + + return 0; +} + +/*! \brief Sort an opaque array according to the order implied by sorting the + * search array by task. Returns a sorted copy of the data array, + * that needs to be myfreed. + * + * We do this by sorting an array of pointers to the elements in + * search, and then using this array to reorder the data + * array. Unfortunately this means making a copy of the data, but + * this just replaces the copy after the mpi_exchange_buffers + * anyway. + * + * \param[in] search Array with sorting criterion. + * \param[in] data Data to be sorted. + * \param[in] n_items Number of elements. + * \param[in] item_size Size of single element. + * + * \return Pointer to sorted data. + */ +void *sort_based_on_mesh_search(mesh_search_data *search, void *data, int n_items, int item_size) +{ + int i; + char *data2; + mesh_search_data **perm; + + data2 = mymalloc_movable(&SaveData2, "data2", (size_t)n_items * item_size); + + SaveData2 = data2; + + perm = mymalloc("perm", n_items * sizeof(*perm)); + + for(i = 0; i < n_items; ++i) + perm[i] = &search[i]; + + mysort(perm, n_items, sizeof(*perm), mesh_search_compare_task); + + // reorder data into data2 + for(i = 0; i < n_items; ++i) + { + size_t orig_pos = perm[i] - search; + memcpy(data2 + item_size * (size_t)i, (char *)data + item_size * orig_pos, item_size); + } + + myfree(perm); + + return (void *)data2; +} + +/*! \brief Sort an opaque array into increasing order of an int field, given + * by the specified offset. (This would typically be field indicating + * the task.) Returns a sorted copy of the data array, that needs to + * be myfreed. + * + * We do this by sorting an array of pointers to the task field, and + * then using this array to deduce the reordering of the data + * array. Unfortunately this means making a copy of the data, but + * this just replaces the copy after the mpi_exchange_buffers + * anyway. + * + * \param[in] data Data to be sorted. + * \param[in] field_offset offset of the sort field. + * \param[in] n_items Number of elements. + * \param[in] item_size Size of individual item. + * + * \return Pointer to sorted array. + */ +void *sort_based_on_field(void *data, int field_offset, int n_items, int item_size) +{ + int i; + char *data2; + int **perm; + + data2 = mymalloc_movable(&SaveData2, "data2", (size_t)n_items * item_size); + + SaveData2 = data2; + + perm = mymalloc("perm", n_items * sizeof(*perm)); + + for(i = 0; i < n_items; ++i) + perm[i] = (int *)((char *)data + (size_t)i * item_size + field_offset); + + mysort(perm, n_items, sizeof(*perm), intpointer_compare); + + // reorder data into data2 + for(i = 0; i < n_items; ++i) + { + size_t orig_pos = ((char *)perm[i] - ((char *)data + field_offset)) / item_size; + myassert(((char *)perm[i] - ((char *)data + field_offset)) % item_size == 0); + memcpy(data2 + item_size * (size_t)i, (char *)data + item_size * orig_pos, item_size); + } + + myfree(perm); + + return (void *)data2; +} + +/*! \brief This function takes a mesh_search structure and exchanges the + * members in an associated structure based on the index and task in + * the search data. n_items is updated to the new size of data. max_n + * is the allocated size of the data array. + * + * Additionally, if the task_offset and cell_offset are nonnegative, + * the Task and Index fields in the search results will be copied to + * those fields in the data array. + * + * \param[in] search Mesh search data. + * \param[in, out] data Data to be sorted. + * \param[in, out] n_items number of elements. + * \param[in, out] max_n Allocated size of data array. + * \param[in] item_size Size of individual element. + * \param[in] commtag Communication tag. + * \param[in] task_offset Offset of this task. + * \param[in] cell_offset offset of cell. + * + * \return void + */ +void mpi_distribute_items_from_search(mesh_search_data *search, void *data, int *n_items, int *max_n, int item_size, int commtag, + int task_offset, int cell_offset) +{ + int i; + + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + + for(i = 0; i < *n_items; i++) + { + int task = search[i].Task; + myassert(task >= 0 && task < NTask); + Send_count[task]++; + + // copy task/index into data array, if applicable + if(task_offset >= 0) + *(int *)((char *)data + (size_t)i * item_size + task_offset) = task; + if(cell_offset >= 0) + *(int *)((char *)data + (size_t)i * item_size + cell_offset) = search[i].u.Index; + } + + void *data2 = sort_based_on_mesh_search(search, data, *n_items, item_size); + + int nimport = mpi_calculate_offsets(Send_count, Send_offset, Recv_count, Recv_offset, 0); + + if(*max_n < nimport) + { + data = myrealloc_movable(data, (size_t)nimport * item_size); + *max_n = nimport; + } + + data2 = SaveData2; + + mpi_exchange_buffers(data2, Send_count, Send_offset, data, Recv_count, Recv_offset, item_size, commtag, 1); + + myfree_movable(data2); + + *n_items = nimport; +} + +/*! \brief This function distributes the members in an opaque structure to + * the tasks based on a task field given by a specified offset into + * the opaque struct. The task field must have int type. n_items is + * updated to the new size of data. max_n is the allocated size of + * the data array, and is updated if a realloc is necessary. + * + * \param[in out] data Data array + * \param[in] task_offset Offset of task. + * \param[in, out] n_items Number of elements in array. + * \param[in, out] max_n Allocated size of the data array. + * \param[in] item_size Size of single element. + * \param[in] commtag Communication tag. + * + * \return void + */ +void mpi_distribute_items_to_tasks(void *data, int task_offset, int *n_items, int *max_n, int item_size, int commtag) +{ + int i; + + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + + for(i = 0; i < *n_items; i++) + { + int task = *(int *)((char *)data + (size_t)i * item_size + task_offset); + myassert(task >= 0 && task < NTask); + Send_count[task]++; + } + + void *data2 = sort_based_on_field(data, task_offset, *n_items, item_size); + + int nimport = mpi_calculate_offsets(Send_count, Send_offset, Recv_count, Recv_offset, 0); + + if(*max_n < nimport) + { + data = myrealloc_movable(data, (size_t)nimport * item_size); + *max_n = nimport; + } + + data2 = SaveData2; + + mpi_exchange_buffers(data2, Send_count, Send_offset, data, Recv_count, Recv_offset, item_size, commtag, 1); + + myfree_movable(data2); + + *n_items = nimport; +} diff --git a/src/amuse/community/arepo/src/mpi_utils/myIBarrier.c b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.c new file mode 100644 index 0000000000..7133759b85 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.c @@ -0,0 +1,175 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/myIBarrier.c + * \date 05/2018 + * \brief Home-made MPI_Ibarrier routine. + * \details Non-blocking version of MPI_Barrier; Once reaching this point, + * a process notifies this to other tasks. + * contains functions: + * void myIBarrier(MPI_Comm comm, struct sMyIBarrier *barrier) + * void myIBarrierTest(struct sMyIBarrier *barrier, int *flag, + * MPI_Status * unused) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifdef MYIBARRIER + +#include + +#include "myIBarrier.h" + +/*! \brief Non-blocking MPI barrier; Notifies other tasks once it is called. + * + * \param[in] comm MPI communicator. + * \param[in, out] Object containing information about the barrier. + * + * \return void + */ +void myIBarrier(MPI_Comm comm, struct sMyIBarrier *barrier) +{ + barrier->comm = comm; + MPI_Comm_rank(comm, &barrier->rank); + MPI_Comm_size(comm, &barrier->nTasks); + + barrier->nLevels = fls(barrier->rank - 1); + barrier->LevelDone = mymalloc("myIBarrier", barrier->nLevels); + memset(barrier->LevelDone, 0, barrier->nLevels); + + /* find messages we would expect from nonexisting tasks */ + for(level = 0; level < barrier->nLevels; level++) + if((barrier->rank & (1 << level) == 0) && (barrier->rank + (1 << level) >= barrier->nTasks)) + barrier->LevelDone[level] = 1; + + /* find out if we have to send or wait */ + int level = 0; + while(level < barrier->nLevels) + { + if(barrier->rank & (1 << level)) + { + /* we need to send our result */ + int target = barrier->rank - (1 << level); + int level = barrier->nLevels; + MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm); + break; + } + else + { + /* check if there is something to recieve in which case we have to wait, otherwise go down one level */ + if(barrier->rank + (1 << level) < barrier->nTasks) + { + barrier->levelDone[level] = 1; + break; + } + else + level++; + } + } +} + +/*! \brief Test function for myIBarrier. + * + * \param[in] barrier Object containing information about the barrier. + * \param[out] flag Was test successful? + * \param[in] unused Unused MPI_Status. + * + * \return void + */ +void myIBarrierTest(struct sMyIBarrier *barrier, int *flag, MPI_Status *unused) +{ + flag = 0; + + int rflag; + MPI_Status status; + + MPI_Iprobe(MPI_ANY_SOURCE, MPI_TAG_IBARRIER, barrier->comm, &rflag, &status); + + if(rflag) + { + int source = status.MPI_SOURCE; + + int level; + MPI_Recv(&level, 1, MPI_INT, source, MPI_TAG_IBARRIER, barrier->comm, MPI_STATUS_IGNORE); + + if(source > barrier->rank) + { + /* we got another result, so lets check if we can send out further */ + while((level < barrier->nLevels) && barrier->LevelDone[level]) + level++; + + if(level == barrier->nLevels) + { + if(barrier->rank != 0) + terminate("fail"); + /* ok, the barrier resolved, tell everyone */ + + for(level = 0; level < barrier->nLevels; level++) + { + if(barrier->rank & (1 << level) == 0) + { + int target = barrier->rank + (1 << level); + if(target < barrier->nTasks) + MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm); + } + else + break; + } + + flag = 1; + } + else + { + if(barrier->rank & (1 << level)) + { + /* we need to send our result */ + int target = barrier->rank - (1 << level); + int level = barrier->nLevels; + MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm); + } + else + { + barrier->LevelDone[level] = 1; + } + } + } + else + { + for(; level < barrier->nLevels; level++) + { + if(barrier->rank & (1 << level) == 0) + { + int target = barrier->rank + (1 << level); + if(target < barrier->nTasks) + MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm); + } + else + break; + } + + flag = 1; + } + } +} + +#endif /* #ifdef MYIBARRIER */ diff --git a/src/amuse/community/arepo/src/mpi_utils/myIBarrier.h b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.h new file mode 100644 index 0000000000..461f8626c9 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.h @@ -0,0 +1,51 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/myIBarrier.h + * \date 05/2018 + * \brief Header for myIBarrier functions. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef MYIBARRIER_H +#define MYIBARRIER_H + +#ifdef MYIBARRIER +#define MPI_TAG_IBARRIER 0x666 + +struct sMyIBarrier +{ + MPI_Comm comm; + int rank; + int nTasks; + int nLevels; + char *LevelDone; +}; + +void myIBarrier(MPI_Comm comm, struct sMyIBarrier *barrier); +void myIBarrierTest(struct sMyIBarrier *barrier, int *flag, MPI_Status *unused); +#endif /* #ifdef MYIBARRIER */ + +#endif /* #ifndef MYIBARRIER_H */ diff --git a/src/amuse/community/arepo/src/mpi_utils/myalltoall.c b/src/amuse/community/arepo/src/mpi_utils/myalltoall.c new file mode 100644 index 0000000000..dcbb889c91 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/myalltoall.c @@ -0,0 +1,122 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/myalltoall.c + * \date 05/2018 + * \brief Specialized all-to-all MPI communication functions. + * \details contains functions: + * void myMPI_Alltoallv(void *sendb, size_t * sendcounts, + * size_t * sdispls, void *recvb, size_t * recvcounts, + * size_t * rdispls, int len, int big_flag, MPI_Comm comm) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief A wrapper around MPI_Alltoallv that can deal with data in + * individual sends that are very big. + * + * \param[in] sendb Starting address of send buffer. + * \param[in] sendcounts Integer array equal to the group size specifying the + * number of elements to send to each processor. + * \param[in] sdispls Integer array (of length group size). Entry j specifies + * the displacement (relative to sendbuf) from which to take the + * outgoing data destined for process j. + * \param[out] recvb Starting address of receive buffer. + * \param[in] recvcounts Integer array equal to the group size specifying the + * maximum number of elements that can be received from each + * processor. + * \param[in] rdispls Integer array (of length group size). Entry i specifies + * the displacement (relative to recvbuf at which to place the + * incoming data from process i. + * \param[in] len Size of single element in send array. + * \param[in] big_flag Flag if cummunication of large data. If not, the normal + * MPI_Alltoallv function is used. + * \param[in] comm MPI communicator. + * + * \return void + */ +void myMPI_Alltoallv(void *sendb, size_t *sendcounts, size_t *sdispls, void *recvb, size_t *recvcounts, size_t *rdispls, int len, + int big_flag, MPI_Comm comm) +{ + char *sendbuf = (char *)sendb; + char *recvbuf = (char *)recvb; + + if(big_flag == 0) + { + int ntask; + MPI_Comm_size(comm, &ntask); + + int *scount = (int *)mymalloc("scount", ntask * sizeof(int)); + int *rcount = (int *)mymalloc("rcount", ntask * sizeof(int)); + int *soff = (int *)mymalloc("soff", ntask * sizeof(int)); + int *roff = (int *)mymalloc("roff", ntask * sizeof(int)); + + for(int i = 0; i < ntask; i++) + { + scount[i] = sendcounts[i] * len; + rcount[i] = recvcounts[i] * len; + soff[i] = sdispls[i] * len; + roff[i] = rdispls[i] * len; + } + + MPI_Alltoallv(sendbuf, scount, soff, MPI_BYTE, recvbuf, rcount, roff, MPI_BYTE, comm); + + myfree(roff); + myfree(soff); + myfree(rcount); + myfree(scount); + } + else + { + /* here we definitely have some large messages. We default to the + * pair-wise protocoll, which should be most robust anyway. + */ + + int ntask, thistask; + MPI_Comm_size(comm, &ntask); + MPI_Comm_rank(comm, &thistask); + + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int target = thistask ^ ngrp; + + if(target < ntask) + { + if(sendcounts[target] > 0 || recvcounts[target] > 0) + myMPI_Sendrecv(sendbuf + sdispls[target] * len, sendcounts[target] * len, MPI_BYTE, target, TAG_PDATA + ngrp, + recvbuf + rdispls[target] * len, recvcounts[target] * len, MPI_BYTE, target, TAG_PDATA + ngrp, comm, + MPI_STATUS_IGNORE); + } + } + } +} diff --git a/src/amuse/community/arepo/src/mpi_utils/pinning.c b/src/amuse/community/arepo/src/mpi_utils/pinning.c new file mode 100644 index 0000000000..f7a6dbb04e --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/pinning.c @@ -0,0 +1,292 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/pinning.c + * \date 05/2018 + * \brief Routines to pin MPI threads to cores. + * \details contains functions: + * void get_core_set(void) + * void detect_topology(void) + * void pin_to_core_set(void) + * void report_pinning(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 08.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef IMPOSE_PINNING +#include +#include + +#define MAX_CORES 4096 + +static int flag_pinning_error = 0; + +static hwloc_cpuset_t cpuset, cpuset_after_MPI_init; +static hwloc_topology_t topology; +static int topodepth; +static int sockets; +static int cores; +static int pus; +static int hyperthreads_per_core; + +/*! \brief Gets the current physical binding of local process. + * + * \return void + */ +void get_core_set(void) +{ + cpuset = hwloc_bitmap_alloc(); + hwloc_get_proc_cpubind(topology, getpid(), cpuset, 0); +} + +/*! \brief Determines the network topology Arepo is running on. + * + * \return void + */ +void detect_topology(void) +{ + unsigned depth; + + /* Allocate and initialize topology object. */ + hwloc_topology_init(&topology); + + /* Perform the topology detection. */ + hwloc_topology_load(topology); + + /* Get some additional topology information + in case we need the topology depth later. */ + topodepth = hwloc_topology_get_depth(topology); + + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET); + + if(depth == HWLOC_TYPE_DEPTH_UNKNOWN) + sockets = -1; + else + sockets = hwloc_get_nbobjs_by_depth(topology, depth); + + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_CORE); + + if(depth == HWLOC_TYPE_DEPTH_UNKNOWN) + cores = -1; + else + cores = hwloc_get_nbobjs_by_depth(topology, depth); + + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); + + if(depth == HWLOC_TYPE_DEPTH_UNKNOWN) + pus = -1; + else + pus = hwloc_get_nbobjs_by_depth(topology, depth); +} + +/*! \brief Pins the MPI ranks to the available core set. + * + * \return void + */ +void pin_to_core_set(void) +{ + int i, num_threads, thread; + char buf[MAX_CORES + 1]; + char *p = getenv("OMP_NUM_THREADS"); + if(p) + num_threads = atoi(p); + else + num_threads = 1; + + mpi_printf("\n\n"); + mpi_printf("PINNING: We have %d sockets, %d physical cores and %d logical cores on the first MPI-task's node.\n", sockets, cores, + pus); + if(cores <= 0 || sockets <= 0 || pus <= 0) + { + mpi_printf("PINNING: The topology cannot be recognized. We refrain from any pinning attempt.\n"); + flag_pinning_error = 1; + return; + } + + hyperthreads_per_core = pus / cores; + + if(hyperthreads_per_core < 1) + terminate("Need at least one logical thread per physical core\n"); + + if(pus > cores) + mpi_printf("PINNING: Looks like %d hyperthreads per physical core are in principle possible.\n", hyperthreads_per_core); + + cpuset_after_MPI_init = hwloc_bitmap_alloc(); + hwloc_get_proc_cpubind(topology, getpid(), cpuset_after_MPI_init, 0); + + if(!hwloc_bitmap_isequal(cpuset, cpuset_after_MPI_init)) + mpi_printf("PINNING: Apparently, the MPI library set some pinning itself. We'll override this.\n"); + + int id, available_pus = 0; + + for(id = hwloc_bitmap_first(cpuset); id != -1; id = hwloc_bitmap_next(cpuset, id)) + available_pus++; + + mpi_printf("PINNING: Looks like %d logical cores are available\n", available_pus); + + if(available_pus == pus) + mpi_printf("PINNING: Looks like all available logical cores are at our disposal.\n"); + else + { + if(available_pus >= 1) + { + mpi_printf("PINNING: Looks like allready before start of the code, a tight binding was imposed.\n"); +#ifdef IMPOSE_PINNING_OVERRIDE_MODE + for(id = 0; id < pus; id++) + hwloc_bitmap_set(cpuset, id); + available_pus = pus; + mpi_printf("PINNING: We are overridung this and make all %d available to us.\n", available_pus); +#else /* #ifdef IMPOSE_PINNING_OVERRIDE_MODE */ + mpi_printf( + "PINNING: We refrain from any pinning attempt ourselves. (This can be changed by setting USE_PINNING_OVERRIDE_MODE.)\n"); + flag_pinning_error = 1; + return; +#endif /* #ifdef IMPOSE_PINNING_OVERRIDE_MODE #else */ + } + } + + for(i = 0; i < pus && i < MAX_CORES; i++) + if(hwloc_bitmap_isset(cpuset, i)) + buf[i] = '1'; + else + buf[i] = '-'; + buf[pus] = 0; + + mpi_printf("PINNING: Available logical cores on first node: %s\n", buf); + + int pus_per_task = available_pus / TasksInThisNode; + + mpi_printf("PINNING: %d logical cores are available per MPI Task.\n", pus_per_task); + + if(pus_per_task <= 0) + terminate("Need at least one logical core per MPI task for pinning to make sense. available_pus=%d TasksInThisNode=%d\n", + available_pus, TasksInThisNode); + + int depth, cid, cores_before, id_this, id_found, count; + hwloc_obj_t obj; + hwloc_cpuset_t cpuset_core; + + /* go through all logical cores in sequence of proximity */ + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); + + for(cid = 0, cores_before = 0; cores_before < RankInThisNode * pus_per_task && cid < pus; cid++) + { + obj = hwloc_get_obj_by_depth(topology, depth, cid); + + cpuset_core = hwloc_bitmap_dup(obj->cpuset); + if(hwloc_bitmap_isincluded(cpuset_core, cpuset)) + { + cores_before++; + } + hwloc_bitmap_free(cpuset_core); + } + + int pus_per_thread, skip; + + if(pus_per_task > NUM_THREADS) + pus_per_thread = pus_per_task / NUM_THREADS; + else + pus_per_thread = 1; + + /* cid should now be the logical index of the first PU for this MPI task */ + for(thread = 0, id_this = id_found = cid, count = 0; thread < NUM_THREADS; thread++) + { + obj = hwloc_get_obj_by_depth(topology, depth, id_found); + cpuset_thread[thread] = hwloc_bitmap_dup(obj->cpuset); + + for(skip = 0; skip < pus_per_thread; skip++) + { + id_this++; + count++; + + id_found = -1; + if(count >= pus_per_task) + { + id_this = cid; + count = 0; + } + do + { + obj = hwloc_get_obj_by_depth(topology, depth, id_this); + cpuset_core = hwloc_bitmap_dup(obj->cpuset); + if(hwloc_bitmap_isincluded(cpuset_core, cpuset)) + { + id_found = id_this; + } + else + { + id_this++; + if(id_this >= pus) + terminate("id_this >= pus"); + } + hwloc_bitmap_free(cpuset_core); + } + while(id_found < 0); + } + } + + hwloc_set_proc_cpubind(topology, getpid(), cpuset_thread[0], HWLOC_CPUBIND_PROCESS); +} + +/*! \brief Prints pinning information for each task. + * + * \return void + */ +void report_pinning(void) +{ + int i; + char buf[MAX_CORES + 1]; + + if(flag_pinning_error) + return; + + hwloc_get_cpubind(topology, cpuset, 0); + + for(i = 0; i < pus && i < MAX_CORES; i++) + if(hwloc_bitmap_isset(cpuset, i)) + buf[i] = '1'; + else + buf[i] = '-'; + buf[pus] = 0; + + for(i = 0; i < NTask; i++) + { + if(ThisTask == i && ThisNode == 0) + printf("PINNING: Node=%4d: Task=%04d: %s\n", ThisNode, ThisTask, buf); + fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + } +} +#endif /* #ifdef IMPOSE_PINNING */ diff --git a/src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c b/src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c new file mode 100644 index 0000000000..6614f4ed03 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c @@ -0,0 +1,116 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/sizelimited_sendrecv.c + * \date 05/2018 + * \brief MPI_Sendrecv operations split into chunks of maximum size. + * \details If the number of elements in the MPI_Sendrecv is larger than + * count_limit, the function will split up the communication into + * multiple chunks communicated by the usual MPI_Sendrecv routine. + * contains functions: + * int myMPI_Sendrecv(void *sendb, size_t sendcount, + * MPI_Datatype sendtype, int dest, int sendtag, void *recvb, + * size_t recvcount, MPI_Datatype recvtype, int source, + * int recvtag, MPI_Comm comm, MPI_Status * status) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Self-made sendrecv function with limiter to the number of elements + * that can be sent in one go. + * + * If the total message is longer, multiple MPI_Sendrecv calls are executed + * until the entire message has been communicated. + * + * \param[in] sendb Initial address of send buffer. + * \param[in] sendcount Number of elements in send buffer. + * \param[in] sendtype Type of elements in send buffer (handle). + * \param[in] dest Rank of destination. + * \param[in] sendtag Send tag. + * \param[out] recvb Initial address of receive buffer. + * \param[in] recvcount Number of elements in receive buffer. + * \param[in] recvtype Type of elements in receive buffer (handle). + * \param[in] source Rank of source. + * \param[in] recvtag Receive tag. + * \param[in] comm MPI communicator. + * \param[out] status Status, referring to receive operation. + * + * \return 0 + */ +int myMPI_Sendrecv(void *sendb, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvb, size_t recvcount, + MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status) +{ + int iter = 0, size_sendtype, size_recvtype, send_now, recv_now; + char *sendbuf = (char *)sendb; + char *recvbuf = (char *)recvb; + + if(dest != source) + terminate("dest != source"); + + MPI_Type_size(sendtype, &size_sendtype); + MPI_Type_size(recvtype, &size_recvtype); + + if(dest == ThisTask) + { + memcpy(recvbuf, sendbuf, recvcount * size_recvtype); + return 0; + } + + size_t count_limit = MPI_MESSAGE_SIZELIMIT_IN_BYTES / size_sendtype; + + while(sendcount > 0 || recvcount > 0) + { + if(sendcount > count_limit) + { + send_now = count_limit; + iter++; + } + else + send_now = sendcount; + + if(recvcount > count_limit) + recv_now = count_limit; + else + recv_now = recvcount; + + MPI_Sendrecv(sendbuf, send_now, sendtype, dest, sendtag, recvbuf, recv_now, recvtype, source, recvtag, comm, status); + + sendcount -= send_now; + recvcount -= recv_now; + + sendbuf += send_now * size_sendtype; + recvbuf += recv_now * size_recvtype; + } + + return 0; +} diff --git a/src/amuse/community/arepo/src/ngbtree/ngbtree.c b/src/amuse/community/arepo/src/ngbtree/ngbtree.c new file mode 100644 index 0000000000..ea0ec2e8cb --- /dev/null +++ b/src/amuse/community/arepo/src/ngbtree/ngbtree.c @@ -0,0 +1,1394 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/ngbtree/ngbtree.c + * \date 05/2018 + * \brief Construct neighbor tree. + * \details This file contains the neighbor tree construction. This is a + * tree structure that includes all gas cells, but no other + * particle types. + * contains functions: + * int ngb_treebuild(int npart) + * static inline unsigned long long ngb_double_to_int(double d) + * int ngb_treebuild_construct(int npart) + * int ngb_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z) + * void ngb_update_node_recursive(int no, int sib, int father, int *last, int mode) + * void ngb_record_topnode_siblings(int no, int sib) + * void ngb_exchange_topleafdata(void) + * void drift_node(struct NgbNODE *current, integertime time1) + * void ngb_update_velocities(void) + * void ngb_update_vbounds(int i, int *nchanged, int *nodelist) + * void ngb_finish_vounds_update(int nchanged, int *nodelist) + * void ngb_update_rangebounds(int i, int *nchanged, int *nodelist) + * void ngb_finish_rangebounds_update(int nchanged, int *nodelist) + * void ngb_treemodifylength(int delta_NgbMaxPart) + * void ngb_treeallocate(void) + * void ngb_treefree(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../gravity/forcetree.h" + +static void ngb_record_topnode_siblings(int no, int sib); +static int ngb_treebuild_construct(int npart); +static void ngb_update_node_recursive(int no, int sib, int father, int *last, int mode); +static void ngb_exchange_topleafdata(void); +static int ngb_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z); +static void ngb_update_vbounds(int i, int *nchanged, int *nodelist); +static void ngb_finish_vounds_update(int nchanged, int *nodelist); + +static int *Ngb_Node_Tmp_Sibling; + +/*! \brief This function is a driver routine for constructing the neighbor + * oct-tree, which is done by calling a small number of other + * functions. + * + * Does not build a tree if All.TotNumGas == 0. + * + * \param[in] npart Number of particles in tree. + * + * \return Number of nodes in the tree. + */ +int ngb_treebuild(int npart) +{ + if(All.TotNumGas == 0) + return 0; + + TIMER_START(CPU_NGBTREEBUILD); + + mpi_printf("NGBTREE: Ngb-tree construction. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + double t0 = second(); + + int flag; + do + { + int flag_single = ngb_treebuild_construct(npart); + + MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + if(flag == -1) + { + myfree(Ngb_Node_Tmp_Sibling + Ngb_MaxPart); + ngb_treefree(); + + All.NgbTreeAllocFactor *= 1.15; + mpi_printf("Increasing NgbTreeAllocFactor, new value=%g\n", All.NgbTreeAllocFactor); + + ngb_treeallocate(); + } + } + while(flag == -1); + + int ntopleaves = DomainNLocalTopleave[ThisTask]; + int *list = DomainListOfLocalTopleaves + DomainFirstLocTopleave[ThisTask]; + + for(int i = 0; i < ntopleaves; i++) + { + int last = -1; + int no = Ngb_DomainNodeIndex[list[i]]; + + if(no < Ngb_MaxPart || no >= Ngb_MaxPart + Ngb_MaxNodes) + terminate("i=%d no=%d task=%d \n", i, no, DomainTask[list[i]]); + + ngb_update_node_recursive(no, Ngb_Node_Tmp_Sibling[no], no, &last, 0); + + /* if there was no particle in the node, we need to initialize nextnode of the node */ + if(no == last) + Ngb_Nodes[no].u.d.nextnode = -1; + + Ngb_Nodes[no].u.d.sibling = last; /* we temporarily store this here and will later restore this sibling pointer, + which is anyway equal to Ngb_Node_Tmp_Sibling[index] */ + } + + ngb_exchange_topleafdata(); + + /* now put in "pseudo" particles as nextnode in non-local topleaves */ + for(int i = 0; i < NTopleaves; i++) + { + if(DomainTask[i] != ThisTask) + { + int index = Ngb_DomainNodeIndex[i]; + Ngb_Nodes[index].u.d.nextnode = Ngb_MaxPart + Ngb_MaxNodes + i; + } + } + + /* now update the top-level tree nodes */ + int last = -1; + ngb_update_node_recursive(Ngb_MaxPart, -1, -1, &last, 1); + + if(last >= Ngb_MaxPart) + { + if(last >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo-particle */ + Ngb_Nextnode[last - Ngb_MaxNodes] = -1; + else + Ngb_Nodes[last].u.d.nextnode = -1; + } + else + Ngb_Nextnode[last] = -1; + + TIMER_STOPSTART(CPU_NGBTREEBUILD, CPU_LOGS); + + double numnodes = Ngb_NumNodes, tot_numnodes; + MPI_Reduce(&numnodes, &tot_numnodes, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + double t1 = second(); + mpi_printf("NGBTREE: Ngb-tree construction done. took %g sec =%g NTopnodes=%d NTopleaves=%d\n", timediff(t0, t1), + tot_numnodes / NTask, NTopnodes, NTopleaves); + + myfree(Ngb_Node_Tmp_Sibling + Ngb_MaxPart); + + Ngb_MarkerValue = 0; + memset(Ngb_Marker, 0, (Ngb_MaxPart + Ngb_NumNodes) * sizeof(int)); + + TIMER_STOP(CPU_LOGS); + + return Ngb_NumNodes; +} + +/*! \brief Converts double precision coordinate to unsigned long long int. + * + * \param[in] d Double precision coordinate that is to be converted. + * + * \return Unsigned long long int represenation of d. + */ +static inline unsigned long long ngb_double_to_int(double d) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = d; + return (u.ull & 0xFFFFFFFFFFFFFllu); +} + +/*! \brief Constructs the neighbor oct-tree. + * + * The index convention for accessing tree nodes is the following: + * + * 0...NumPart-1 reference single particles. + * Ngb_MaxPart.... Ngb_MaxPart+Numnodes-1 references tree nodes. + * Ngb_MaxPart + All.MaxNgb_Nodes.... reference "pseudo + * particles", i.e. the marker that indicates a top-node lying on + * another CPU. + * + * `Ngb_Nodes_base' points to the first tree node, + * `Ngb_Nodes' is shifted such that Ngb_Nodes[Ngb_MaxPart] gives the first + * tree node. + * + * \param[in] npart Number of particles involved. + * + * \return status: 0 (default) -1: too many nodes. + */ +int ngb_treebuild_construct(int npart) +{ + /* create an empty root node */ + Ngb_NextFreeNode = Ngb_MaxPart; /* index of first free node */ + + for(int i = 0; i < 8; i++) + Ngb_Nodes[Ngb_NextFreeNode].u.suns[i] = -1; + + Ngb_NumNodes = 1; + Ngb_NextFreeNode++; + + /* create a set of empty nodes corresponding to the top-level domain + * grid. We need to generate these nodes first to make sure that we have a + * complete top-level tree which allows the easy insertion of the + * pseudo-particles at the right place + */ + if(ngb_create_empty_nodes(Ngb_MaxPart, 0, 1, 0, 0, 0) < 0) + return -1; + + Ngb_FirstNonTopLevelNode = Ngb_NextFreeNode; + + Ngb_Node_Tmp_Sibling = (int *)mymalloc("Ngb_Node_Tmp_Sibling", (Ngb_MaxNodes + 1) * sizeof(int)); + Ngb_Node_Tmp_Sibling -= Ngb_MaxPart; + + ngb_record_topnode_siblings(Ngb_MaxPart, -1); + + unsigned long long *ngbTree_IntPos_list = + (unsigned long long *)mymalloc("ngbTree_IntPos_list", 3 * npart * sizeof(unsigned long long)); + + /* now we insert all particles */ + { + int out_of_space = 0; + + int threadid = get_thread_num(); + int start, end, size; + + int first_empty_slot = Ngb_NextFreeNode + threadid * TAKE_NSLOTS_IN_ONE_GO; + int count_empty_slot = TAKE_NSLOTS_IN_ONE_GO; + + if(threadid == 0) + Ngb_NextFreeNode += NUM_THREADS * TAKE_NSLOTS_IN_ONE_GO; + + size = (npart - 1) / NUM_THREADS + 1; + start = threadid * size; + end = (threadid + 1) * size - 1; + if(end >= npart) + end = npart - 1; + + for(int i = start; i <= end && out_of_space == 0; i++) + { + unsigned long long xxb = ngb_double_to_int(((P[i].Pos[0] - DomainCorner[0]) * DomainInverseLen) + 1.0); + unsigned long long yyb = ngb_double_to_int(((P[i].Pos[1] - DomainCorner[1]) * DomainInverseLen) + 1.0); + unsigned long long zzb = ngb_double_to_int(((P[i].Pos[2] - DomainCorner[2]) * DomainInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + unsigned char levels = 0; + + ngbTree_IntPos_list[3 * i + 0] = xxb; + ngbTree_IntPos_list[3 * i + 1] = yyb; + ngbTree_IntPos_list[3 * i + 2] = zzb; + + int no = 0; + while(TopNodes[no].Daughter >= 0) /* walk down top tree to find correct leaf */ + { + unsigned char subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + mask >>= 1; + levels++; + + no = TopNodes[no].Daughter + TopNodes[no].MortonToPeanoSubnode[subnode]; + } + + no = TopNodes[no].Leaf; + + if(DomainTask[no] != ThisTask) + terminate("STOP! ID=%lld of type=%d is inserted into task=%d, but should be on task=%d no=%d\n", (long long)P[i].ID, + P[i].Type, ThisTask, DomainTask[no], no); + + int th = Ngb_DomainNodeIndex[no]; + + signed long long centermask = (0xFFF0000000000000llu) >> levels; + + int parent = -1; /* note: will not be used below before it is changed */ + unsigned char subnode = 0; + + while(1) + { + if(th >= Ngb_MaxPart) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEEL < gravitational softening length + */ + for(int j = 0; j < 8; j++) + { + if(Ngb_Nodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + int nn = Ngb_Nodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; + th = nn; + } + else + { + /* here we have found an empty slot where we can attach + * the new particle as a leaf. + */ + Ngb_Nodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* We try to insert into a leaf with a single particle. Need + * to generate a new internal node at this point. + * Then resume trying to insert the new particle at + * the newly created internal node + */ + int thold = th; + + if(count_empty_slot) + { + th = first_empty_slot + (TAKE_NSLOTS_IN_ONE_GO - count_empty_slot); + count_empty_slot--; + } + else + { + { + th = Ngb_NextFreeNode; + Ngb_NextFreeNode += TAKE_NSLOTS_IN_ONE_GO; + } + + first_empty_slot = th; + count_empty_slot = (TAKE_NSLOTS_IN_ONE_GO - 1); + + if(first_empty_slot + TAKE_NSLOTS_IN_ONE_GO - Ngb_MaxPart >= Ngb_MaxNodes) + { + out_of_space = 1; + break; + } + } + + Ngb_Nodes[parent].u.suns[subnode] = th; + struct NgbNODE *nfreep = &Ngb_Nodes[th]; + + for(int j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + unsigned long long *intppos = &ngbTree_IntPos_list[3 * thold]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = thold; + } + } + } + } + + myfree(ngbTree_IntPos_list); + + if((Ngb_NumNodes = Ngb_NextFreeNode - Ngb_MaxPart) >= Ngb_MaxNodes) + { + if(All.NgbTreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + dump_particles(); + terminate("task %d: out of space for neighbor tree, stopping with particle dump.\n", ThisTask); + } + else + return -1; + } + + return 0; +} + +/*! \brief Create empty ngb-tree node. + * + * This function recursively creates a set of empty tree nodes which + * corresponds to the top-level tree for the domain grid. This is done to + * ensure that this top-level tree is always "complete" so that we can easily + * associate the pseudo-particles of other CPUs with tree-nodes at a given + * level in the tree, even when the particle population is so sparse that + * some of these nodes are actually empty. + * + * \param[in] no Index of node in Ngb_Nodes array. + * \param[in] topnode Index in TopNodes. + * \param[in] bits Number of bits used. + * \param[in] x Integer coordinate X. + * \param[in] y Integer coordinate Y. + * \param[in] z Integer coordinate Z. + * + * \return Status: 0 success; -1 error. + */ +int ngb_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z) +{ + if(TopNodes[topnode].Daughter >= 0) + { + for(int i = 0; i < 2; i++) + for(int j = 0; j < 2; j++) + for(int k = 0; k < 2; k++) + { + if(Ngb_NumNodes >= Ngb_MaxNodes) + { + if(All.NgbTreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + dump_particles(); + terminate("task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", ThisTask, + NTopnodes); + } + return -1; + } + + int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + int count = i + 2 * j + 4 * k; + + Ngb_Nodes[no].u.suns[count] = Ngb_NextFreeNode; + + for(int n = 0; n < 8; n++) + Ngb_Nodes[Ngb_NextFreeNode].u.suns[n] = -1; + + if(TopNodes[TopNodes[topnode].Daughter + sub].Daughter == -1) + Ngb_DomainNodeIndex[TopNodes[TopNodes[topnode].Daughter + sub].Leaf] = Ngb_NextFreeNode; + + Ngb_NextFreeNode++; + Ngb_NumNodes++; + + if(ngb_create_empty_nodes(Ngb_NextFreeNode - 1, TopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, 2 * y + j, + 2 * z + k) < 0) + return -1; + } + } + + return 0; +} + +/*! \brief Determine node ranges. + * + * This routine determines the node ranges a given internal node + * and all its subnodes using a recursive computation. The result is + * stored in the Ngb_Nodes[] structure in the sequence of this tree-walk. + * + * + * \param[in] no Index of node. + * \param[in] sib Sibling node of no. + * \param[in] father Parent node of no. + * \param[in, out] last Pointer to last node for which this function was + * called. + * \param[in] mode 0: process a leave branch; 1: process top-level nodes. + * + * \return void + */ +void ngb_update_node_recursive(int no, int sib, int father, int *last, int mode) +{ + int j, jj, k, p, pp, nextsib, suns[8]; + MyNgbTreeFloat range_min[3]; + MyNgbTreeFloat range_max[3]; + MyNgbTreeFloat vertex_vmin[3]; + MyNgbTreeFloat vertex_vmax[3]; +#ifdef TREE_BASED_TIMESTEPS + MyNgbTreeFloat vmin[3], vmax[3], maxcsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + if(no >= Ngb_MaxPart && no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */ + { + if(*last >= 0) + { + if(*last >= Ngb_MaxPart) + { + if(*last == no) + terminate("as"); + + if(*last >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo-particle */ + Ngb_Nextnode[*last - Ngb_MaxNodes] = no; + else + Ngb_Nodes[*last].u.d.nextnode = no; + } + else + Ngb_Nextnode[*last] = no; + } + + *last = no; + + int not_interal_top_level = 0; + + if(mode == 1) + { + if(!(no >= Ngb_MaxPart && no < Ngb_FirstNonTopLevelNode)) + terminate("can't be"); + + if(Ngb_Node_Tmp_Sibling[no] != -2) + not_interal_top_level = 1; + } + + if(not_interal_top_level) + { + p = Ngb_Nodes[no].u.d.nextnode; + + if(p >= Ngb_MaxPart + Ngb_MaxNodes && + p < Ngb_MaxPart + Ngb_MaxNodes + NTopleaves) /* a pseudo-particle, i.e. we are dealing with a non-local top-leave */ + ngb_update_node_recursive(p, sib, no, last, mode); + else + { + /* this is local toplevel node */ + *last = Ngb_Nodes[no].u.d.sibling; + } + + if(Ngb_Node_Tmp_Sibling[no] != sib) + terminate("Ngb_Node_Tmp_Sibling[no] != sib"); + + /* restore the sibling pointer for local toplevel nodes (we had temporarily stored the last element in this branch */ + Ngb_Nodes[no].u.d.sibling = sib; + Ngb_Nodes[no].father = father; + } + else + { + for(j = 0; j < 8; j++) + suns[j] = Ngb_Nodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + +#ifdef TREE_BASED_TIMESTEPS + maxcsnd = 0; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(k = 0; k < 3; k++) + { + range_min[k] = MAX_NGBRANGE_NUMBER; + range_max[k] = -MAX_NGBRANGE_NUMBER; + + vertex_vmin[k] = MAX_NGBRANGE_NUMBER; + vertex_vmax[k] = -MAX_NGBRANGE_NUMBER; + +#ifdef TREE_BASED_TIMESTEPS + vmin[k] = MAX_NGBRANGE_NUMBER; + vmax[k] = -MAX_NGBRANGE_NUMBER; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + ngb_update_node_recursive(p, nextsib, no, last, mode); + + if(p >= Ngb_MaxPart) /* an internal node or pseudo particle */ + { + if(p >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo particle */ + { + /* nothing to be done here because the mass of the + * pseudo-particle is still zero. This will be changed + * later. + */ + } + else + { +#ifdef TREE_BASED_TIMESTEPS + if(maxcsnd < ExtNgb_Nodes[p].MaxCsnd) + maxcsnd = ExtNgb_Nodes[p].MaxCsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(k = 0; k < 3; k++) + { + if(range_min[k] > Ngb_Nodes[p].u.d.range_min[k]) + range_min[k] = Ngb_Nodes[p].u.d.range_min[k]; + + if(range_max[k] < Ngb_Nodes[p].u.d.range_max[k]) + range_max[k] = Ngb_Nodes[p].u.d.range_max[k]; + + if(vertex_vmin[k] > Ngb_Nodes[p].vertex_vmin[k]) + vertex_vmin[k] = Ngb_Nodes[p].vertex_vmin[k]; + + if(vertex_vmax[k] < Ngb_Nodes[p].vertex_vmax[k]) + vertex_vmax[k] = Ngb_Nodes[p].vertex_vmax[k]; + +#ifdef TREE_BASED_TIMESTEPS + if(vmin[k] > ExtNgb_Nodes[p].vmin[k]) + vmin[k] = ExtNgb_Nodes[p].vmin[k]; + + if(vmax[k] < ExtNgb_Nodes[p].vmax[k]) + vmax[k] = ExtNgb_Nodes[p].vmax[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + } + } + else /* a particle */ + { +#ifdef TREE_BASED_TIMESTEPS + if(maxcsnd < SphP[p].Csnd) + maxcsnd = SphP[p].Csnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(k = 0; k < 3; k++) + { + if(range_min[k] > P[p].Pos[k]) + range_min[k] = P[p].Pos[k]; + + if(range_max[k] < P[p].Pos[k]) + range_max[k] = P[p].Pos[k]; + + if(P[p].Type == 0) + { + if(vertex_vmin[k] > SphP[p].VelVertex[k]) + vertex_vmin[k] = SphP[p].VelVertex[k]; + + if(vertex_vmax[k] < SphP[p].VelVertex[k]) + vertex_vmax[k] = SphP[p].VelVertex[k]; + } + +#ifdef TREE_BASED_TIMESTEPS + if(vmin[k] > P[p].Vel[k]) + vmin[k] = P[p].Vel[k]; + + if(vmax[k] < P[p].Vel[k]) + vmax[k] = P[p].Vel[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + } + } + } + +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].MaxCsnd = maxcsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + for(k = 0; k < 3; k++) + { + Ngb_Nodes[no].u.d.range_min[k] = range_min[k]; + Ngb_Nodes[no].u.d.range_max[k] = range_max[k]; + Ngb_Nodes[no].vertex_vmin[k] = vertex_vmin[k]; + Ngb_Nodes[no].vertex_vmax[k] = vertex_vmax[k]; +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].vmin[k] = vmin[k]; + ExtNgb_Nodes[no].vmax[k] = vmax[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + Ngb_Nodes[no].u.d.sibling = sib; + Ngb_Nodes[no].father = father; + + Ngb_Nodes[no].Ti_Current = All.Ti_Current; + } + } + else /* single particle or pseudo particle */ + { + if(*last >= 0) + { + if(*last >= Ngb_MaxPart) + { + if(*last >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo-particle */ + Ngb_Nextnode[*last - Ngb_MaxNodes] = no; + else + Ngb_Nodes[*last].u.d.nextnode = no; + } + else + { + Ngb_Nextnode[*last] = no; + } + } + if(no < Ngb_MaxPart) /* only set it for single particles... */ + { + if(father < Ngb_MaxPart) + terminate("no=%d father=%d\n", no, father); + + Ngb_Father[no] = father; + } + + *last = no; + } +} + +/*! \brief Sets sibling information in u.suns for node no. + * + * \param[in] no Index of node. + * \param[in] sib Index of sibling. + * + * \return void + */ +void ngb_record_topnode_siblings(int no, int sib) +{ + /* note: when this routine is called, only toplevel tree nodes are present */ + + if(Ngb_Nodes[no].u.suns[0] >= 0) + { + /* marker value to designate internal nodes in the top-level tree */ + Ngb_Node_Tmp_Sibling[no] = -2; + + if(Ngb_Nodes[no].u.suns[0] >= 0) + for(int j = 0; j < 8; j++) + { + int p = Ngb_Nodes[no].u.suns[j]; + int nextsib; + + if(j < 7) + nextsib = Ngb_Nodes[no].u.suns[j + 1]; + else + nextsib = sib; + + ngb_record_topnode_siblings(p, nextsib); + } + } + else + Ngb_Node_Tmp_Sibling[no] = sib; /* a top-level leave node */ +} + +/*! \brief Communicates top leaf data. + * + * \return void + */ +void ngb_exchange_topleafdata(void) +{ + struct DomainNODE + { + MyNgbTreeFloat range_min[3]; + MyNgbTreeFloat range_max[3]; + MyNgbTreeFloat vertex_vmin[3]; + MyNgbTreeFloat vertex_vmax[3]; +#ifdef TREE_BASED_TIMESTEPS + MyNgbTreeFloat MaxCsnd, vmin[3], vmax[3]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + }; + + struct DomainNODE *DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE)); + + /* share the pseudo-particle data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(int task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(int n = 0; n < NTopleaves; n++) + recvcounts[DomainTask[n]]++; + + for(int task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + recvoffset[0] = 0, byteoffset[0] = 0; + for(int task = 1; task < NTask; task++) + { + recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1]; + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + } + + struct DomainNODE *loc_DomainMoment = + (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + int idx = 0; + for(int n = 0; n < NTopleaves; n++) + { + if(DomainTask[n] == ThisTask) + { + int no = Ngb_DomainNodeIndex[n]; + + /* read out the multipole moments from the local base cells */ +#ifdef TREE_BASED_TIMESTEPS + loc_DomainMoment[idx].MaxCsnd = ExtNgb_Nodes[no].MaxCsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(int k = 0; k < 3; k++) + { + loc_DomainMoment[idx].range_min[k] = Ngb_Nodes[no].u.d.range_min[k]; + loc_DomainMoment[idx].range_max[k] = Ngb_Nodes[no].u.d.range_max[k]; + loc_DomainMoment[idx].vertex_vmin[k] = Ngb_Nodes[no].vertex_vmin[k]; + loc_DomainMoment[idx].vertex_vmax[k] = Ngb_Nodes[no].vertex_vmax[k]; +#ifdef TREE_BASED_TIMESTEPS + loc_DomainMoment[idx].vmin[k] = ExtNgb_Nodes[no].vmin[k]; + loc_DomainMoment[idx].vmax[k] = ExtNgb_Nodes[no].vmax[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + idx++; + } + } + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(int task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(int n = 0; n < NTopleaves; n++) + { + int task = DomainTask[n]; + if(task != ThisTask) + { + int no = Ngb_DomainNodeIndex[n]; + int idx = recvoffset[task] + recvcounts[task]++; + +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].MaxCsnd = DomainMoment[idx].MaxCsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(int k = 0; k < 3; k++) + { + Ngb_Nodes[no].u.d.range_min[k] = DomainMoment[idx].range_min[k]; + Ngb_Nodes[no].u.d.range_max[k] = DomainMoment[idx].range_max[k]; + Ngb_Nodes[no].vertex_vmin[k] = DomainMoment[idx].vertex_vmin[k]; + Ngb_Nodes[no].vertex_vmax[k] = DomainMoment[idx].vertex_vmax[k]; +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].vmin[k] = DomainMoment[idx].vmin[k]; + ExtNgb_Nodes[no].vmax[k] = DomainMoment[idx].vmax[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + Ngb_Nodes[no].Ti_Current = All.Ti_Current; + } + } + + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvoffset); + myfree(recvcounts); + myfree(DomainMoment); +} + +/*! \brief Drifts a node to time time1. + * + * \param[in] current Current node. + * \param[in] time1 Time to be drifted to. + * + * \return void + */ +void drift_node(struct NgbNODE *current, integertime time1) +{ + double dt_drift; + + if(All.ComovingIntegrationOn) + dt_drift = get_drift_factor(current->Ti_Current, time1); + else + dt_drift = (time1 - current->Ti_Current) * All.Timebase_interval; + + for(int j = 0; j < 3; j++) + { + current->u.d.range_min[j] += current->vertex_vmin[j] * dt_drift; + current->u.d.range_max[j] += current->vertex_vmax[j] * dt_drift; + } + + current->Ti_Current = time1; +} + +/*! \brief Updates velocity informataion in ngb node data. + * + * \return void + */ +void ngb_update_velocities(void) +{ + TIMER_START(CPU_NGBTREEUPDATEVEL); + + Ngb_MarkerValue++; + + int nchanged = 0; + int *nodelist = (int *)mymalloc("nodelist", NTopleaves * sizeof(int)); + + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int target = TimeBinsHydro.ActiveParticleList[idx]; + if(target >= 0) + if(P[target].Type == 0) + ngb_update_vbounds(target, &nchanged, nodelist); + } + + for(int timebin = All.HighestSynchronizedTimeBin; timebin >= 0; timebin--) + { + for(int target = TimeBinsGravity.FirstInTimeBin[timebin]; target >= 0; target = TimeBinsGravity.NextInTimeBin[target]) + if(target >= 0) + if(P[target].Type == 0) + ngb_update_vbounds(target, &nchanged, nodelist); + } + + ngb_finish_vounds_update(nchanged, nodelist); + + myfree(nodelist); + + TIMER_STOP(CPU_NGBTREEUPDATEVEL); +} + +/*! \brief Updates vmin and vmax in ngb nodes. + * + * Inverse tree walk. + * + * \param[in] i Index of particle. + * \param[in, out] nchanged Number of changed top level nodes. + * \param[out] nodelist Top level nodes that were changed. + * + * \return void + */ +void ngb_update_vbounds(int i, int *nchanged, int *nodelist) +{ + int no = Ngb_Father[i]; + + while(no >= 0) + { + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + int flag_changed = 0; + + for(int j = 0; j < 3; j++) + { + if(Ngb_Nodes[no].vertex_vmin[j] > SphP[i].VelVertex[j]) + { + Ngb_Nodes[no].vertex_vmin[j] = SphP[i].VelVertex[j]; + flag_changed = 1; + } + + if(Ngb_Nodes[no].vertex_vmax[j] < SphP[i].VelVertex[j]) + { + Ngb_Nodes[no].vertex_vmax[j] = SphP[i].VelVertex[j]; + flag_changed = 1; + } + +#ifdef TREE_BASED_TIMESTEPS + if(ExtNgb_Nodes[no].vmin[j] > P[i].Vel[j]) + { + ExtNgb_Nodes[no].vmin[j] = P[i].Vel[j]; + flag_changed = 1; + } + + if(ExtNgb_Nodes[no].vmax[j] < P[i].Vel[j]) + { + ExtNgb_Nodes[no].vmax[j] = P[i].Vel[j]; + flag_changed = 1; + } +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + if(flag_changed == 0) + break; + + if(no < Ngb_FirstNonTopLevelNode) /* top-level tree-node reached */ + { + if(Ngb_Marker[no] != Ngb_MarkerValue) + { + Ngb_Marker[no] = Ngb_MarkerValue; + nodelist[*nchanged] = no; + *nchanged = *nchanged + 1; + } + break; + } + + no = Ngb_Nodes[no].father; + } +} + +/*! \brief Finalizes velocity bounds update. + * + * Exchanges changed information in top level nodes to all tasks. + * + * \param[in] nchanged Number of changed top level nodes. + * \param[in] list of changed top level nodes + * + * \return void + */ +void ngb_finish_vounds_update(int nchanged, int *nodelist) +{ + struct DomainNODE + { + int node; + MyNgbTreeFloat vertex_vmin[3]; + MyNgbTreeFloat vertex_vmax[3]; +#ifdef TREE_BASED_TIMESTEPS + MyNgbTreeFloat vmin[3]; + MyNgbTreeFloat vmax[3]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + }; + + /* share the pseudo-particle data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + MPI_Allgather(&nchanged, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD); + + for(int task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + byteoffset[0] = 0; + for(int task = 1; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + struct DomainNODE *loc_DomainMoment = + (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + for(int i = 0; i < nchanged; i++) + { + int no = nodelist[i]; + loc_DomainMoment[i].node = no; + + for(int j = 0; j < 3; j++) + { + loc_DomainMoment[i].vertex_vmin[j] = Ngb_Nodes[no].vertex_vmin[j]; + loc_DomainMoment[i].vertex_vmax[j] = Ngb_Nodes[no].vertex_vmax[j]; +#ifdef TREE_BASED_TIMESTEPS + loc_DomainMoment[i].vmin[j] = ExtNgb_Nodes[no].vmin[j]; + loc_DomainMoment[i].vmax[j] = ExtNgb_Nodes[no].vmax[j]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + } + + int tot_nchanged = 0; + for(int task = 0; task < NTask; task++) + tot_nchanged += recvcounts[task]; + + struct DomainNODE *tot_DomainMoment = (struct DomainNODE *)mymalloc("tot_DomainMoment", tot_nchanged * sizeof(struct DomainNODE)); + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, tot_DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(int i = 0; i < tot_nchanged; i++) + { + int no = tot_DomainMoment[i].node; + + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + for(int j = 0; j < 3; j++) + { + Ngb_Nodes[no].vertex_vmin[j] = tot_DomainMoment[i].vertex_vmin[j]; + Ngb_Nodes[no].vertex_vmax[j] = tot_DomainMoment[i].vertex_vmax[j]; +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].vmin[j] = tot_DomainMoment[i].vmin[j]; + ExtNgb_Nodes[no].vmax[j] = tot_DomainMoment[i].vmax[j]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + no = Ngb_Nodes[no].father; + + while(no >= 0) + { + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + int flag_changed = 0; + + for(int j = 0; j < 3; j++) + { + if(Ngb_Nodes[no].vertex_vmin[j] > tot_DomainMoment[i].vertex_vmin[j]) + { + Ngb_Nodes[no].vertex_vmin[j] = tot_DomainMoment[i].vertex_vmin[j]; + flag_changed = 1; + } + + if(Ngb_Nodes[no].vertex_vmax[j] < tot_DomainMoment[i].vertex_vmax[j]) + { + Ngb_Nodes[no].vertex_vmax[j] = tot_DomainMoment[i].vertex_vmax[j]; + flag_changed = 1; + } +#ifdef TREE_BASED_TIMESTEPS + if(ExtNgb_Nodes[no].vmin[j] > tot_DomainMoment[i].vmin[j]) + { + ExtNgb_Nodes[no].vmin[j] = tot_DomainMoment[i].vmin[j]; + flag_changed = 1; + } + + if(ExtNgb_Nodes[no].vmax[j] < tot_DomainMoment[i].vmax[j]) + { + ExtNgb_Nodes[no].vmax[j] = tot_DomainMoment[i].vmax[j]; + flag_changed = 1; + } +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + if(flag_changed == 0) + break; + + no = Ngb_Nodes[no].father; + } + } + + myfree(tot_DomainMoment); + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvcounts); +} + +/*! \brief Updates min and max position in ngb nodes. + * + * Inverse tree walk. + * + * \param[in] i Index of particle. + * \param[in, out] nchanged Number of changed top level nodes. + * \param[out] nodelist Top level nodes that were changed. + * + * \return void + */ +void ngb_update_rangebounds(int i, int *nchanged, int *nodelist) +{ + int no = Ngb_Father[i]; + + while(no >= 0) + { + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + int flag_changed = 0; + + for(int j = 0; j < 3; j++) + { + if(Ngb_Nodes[no].u.d.range_min[j] > P[i].Pos[j]) + { + Ngb_Nodes[no].u.d.range_min[j] = P[i].Pos[j]; + flag_changed = 1; + } + + if(Ngb_Nodes[no].u.d.range_max[j] < P[i].Pos[j]) + { + Ngb_Nodes[no].u.d.range_max[j] = P[i].Pos[j]; + flag_changed = 1; + } + } + + if(flag_changed == 0) + break; + + if(no < Ngb_FirstNonTopLevelNode) /* top-level tree-node reached */ + { + if(Ngb_Marker[no] != Ngb_MarkerValue) + { + Ngb_Marker[no] = Ngb_MarkerValue; + nodelist[*nchanged] = no; + *nchanged = *nchanged + 1; + } + break; + } + + no = Ngb_Nodes[no].father; + } +} + +/*! \brief Finalizes position bounds update. + * + * Exchanges changed information in top level nodes to all tasks. + * + * \param[in] nchanged Number of changed top level nodes. + * \param[in] nodelist List of changed top level nodes. + * + * \return void + */ +void ngb_finish_rangebounds_update(int nchanged, int *nodelist) +{ + struct DomainNODE + { + int node; + MyNgbTreeFloat range_min[3]; + MyNgbTreeFloat range_max[3]; + }; + + /* share the pseudo-particle data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + MPI_Allgather(&nchanged, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD); + + for(int task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + byteoffset[0] = 0; + for(int task = 1; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + struct DomainNODE *loc_DomainMoment = + (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + for(int i = 0; i < nchanged; i++) + { + int no = nodelist[i]; + loc_DomainMoment[i].node = no; + + for(int j = 0; j < 3; j++) + { + loc_DomainMoment[i].range_min[j] = Ngb_Nodes[no].u.d.range_min[j]; + loc_DomainMoment[i].range_max[j] = Ngb_Nodes[no].u.d.range_max[j]; + } + } + + int tot_nchanged = 0; + for(int task = 0; task < NTask; task++) + tot_nchanged += recvcounts[task]; + + struct DomainNODE *tot_DomainMoment = (struct DomainNODE *)mymalloc("tot_DomainMoment", tot_nchanged * sizeof(struct DomainNODE)); + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, tot_DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(int i = 0; i < tot_nchanged; i++) + { + int no = tot_DomainMoment[i].node; + + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + for(int j = 0; j < 3; j++) + { + Ngb_Nodes[no].u.d.range_min[j] = tot_DomainMoment[i].range_min[j]; + Ngb_Nodes[no].u.d.range_max[j] = tot_DomainMoment[i].range_max[j]; + } + + no = Ngb_Nodes[no].father; + + while(no >= 0) + { + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + int flag_changed = 0; + + for(int j = 0; j < 3; j++) + { + if(Ngb_Nodes[no].u.d.range_min[j] > tot_DomainMoment[i].range_min[j]) + { + Ngb_Nodes[no].u.d.range_min[j] = tot_DomainMoment[i].range_min[j]; + flag_changed = 1; + } + + if(Ngb_Nodes[no].u.d.range_max[j] < tot_DomainMoment[i].range_max[j]) + { + Ngb_Nodes[no].u.d.range_max[j] = tot_DomainMoment[i].range_max[j]; + flag_changed = 1; + } + } + + if(flag_changed == 0) + break; + + no = Ngb_Nodes[no].father; + } + } + + myfree(tot_DomainMoment); + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvcounts); +} + +/*! \brief Adjust ngb-tree structures due to a change in number of gas cells. + * + * \param[in] delta_NgbMaxPart Difference in number of cells. + * + * \return void + */ +void ngb_treemodifylength(int delta_NgbMaxPart) +{ + mpi_printf("ALLOCATE: Need to adjust NgbTree because Ngb_MaxPart needs to grow by %d\n", delta_NgbMaxPart); + + for(int i = 0; i < Ngb_MaxPart + NTopleaves; i++) /* check for particles and pseudo particles */ + if(Ngb_Nextnode[i] >= Ngb_MaxPart) /* internal node or pseudo particle */ + Ngb_Nextnode[i] += delta_NgbMaxPart; + + for(int i = 0; i < Ngb_MaxPart; i++) + if(Ngb_Father[i] >= Ngb_MaxPart) /* internal node or pseudo particle */ + Ngb_Father[i] += delta_NgbMaxPart; + + for(int i = 0; i < Ngb_MaxNodes; i++) + { + if(Ngb_Nodes[i + Ngb_MaxPart].u.d.nextnode >= Ngb_MaxPart) /* internal node or pseudo particle */ + Ngb_Nodes[i + Ngb_MaxPart].u.d.nextnode += delta_NgbMaxPart; + + if(Ngb_Nodes[i + Ngb_MaxPart].u.d.sibling >= Ngb_MaxPart) /* internal node or pseudo particle */ + Ngb_Nodes[i + Ngb_MaxPart].u.d.sibling += delta_NgbMaxPart; + + if(Ngb_Nodes[i + Ngb_MaxPart].father >= Ngb_MaxPart) + Ngb_Nodes[i + Ngb_MaxPart].father += delta_NgbMaxPart; + } + + for(int i = 0; i < NTopleaves; i++) + Ngb_DomainNodeIndex[i] += delta_NgbMaxPart; + + Ngb_Nextnode = (int *)myrealloc_movable(Ngb_Nextnode, (Ngb_MaxPart + delta_NgbMaxPart + NTopleaves) * sizeof(int)); + + memmove(&Ngb_Nextnode[Ngb_MaxPart + delta_NgbMaxPart], &Ngb_Nextnode[Ngb_MaxPart], NTopleaves * sizeof(int)); + + Ngb_MaxPart += delta_NgbMaxPart; + + Ngb_FirstNonTopLevelNode += delta_NgbMaxPart; + + Ngb_Nodes -= delta_NgbMaxPart; + +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes -= delta_NgbMaxPart; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + Ngb_Father = (int *)myrealloc_movable(Ngb_Father, Ngb_MaxPart * sizeof(int)); + + Ngb_Marker = (int *)myrealloc_movable(Ngb_Marker, (Ngb_MaxNodes + Ngb_MaxPart) * sizeof(int)); + memmove(Ngb_Marker + Ngb_MaxPart, Ngb_Marker + Ngb_MaxPart - delta_NgbMaxPart, Ngb_MaxNodes * sizeof(int)); + memset(Ngb_Marker + Ngb_MaxPart - delta_NgbMaxPart, -1, delta_NgbMaxPart * sizeof(int)); +} + +/*! \brief Allocates arrays for neighbor tree. + * + * \return void + */ +void ngb_treeallocate(void) +{ + if(Ngb_MaxPart == 0) + { + Ngb_MaxPart = All.MaxPartSph; + Ngb_MaxNodes = (int)(All.NgbTreeAllocFactor * (All.MaxPartSph + BASENUMBER)) + NTopnodes; + } + + if(All.TotNumGas == 0) + return; + + if(Ngb_Nodes) + terminate("already allocated"); + + Ngb_DomainNodeIndex = (int *)mymalloc_movable(&Ngb_DomainNodeIndex, "Ngb_DomainNodeIndex", NTopleaves * sizeof(int)); + + Ngb_Nodes = (struct NgbNODE *)mymalloc_movable(&Ngb_Nodes, "Ngb_Nodes", (Ngb_MaxNodes + 1) * sizeof(struct NgbNODE)); + Ngb_Nodes -= Ngb_MaxPart; + +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes = (struct ExtNgbNODE *)mymalloc_movable(&ExtNgb_Nodes, "ExtNgb_Nodes", (Ngb_MaxNodes + 1) * sizeof(struct ExtNgbNODE)); + ExtNgb_Nodes -= Ngb_MaxPart; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + Ngb_Nextnode = (int *)mymalloc_movable(&Ngb_Nextnode, "Ngb_Nextnode", (Ngb_MaxPart + NTopleaves) * sizeof(int)); + Ngb_Father = (int *)mymalloc_movable(&Ngb_Father, "Ngb_Father", Ngb_MaxPart * sizeof(int)); + + Ngb_Marker = (int *)mymalloc_movable(&Ngb_Marker, "Ngb_Marker", (Ngb_MaxNodes + Ngb_MaxPart) * sizeof(int)); +} + +/*! \brief This function frees the memory allocated for the neighbor tree. + * + * \return void + */ +void ngb_treefree(void) +{ + if(All.TotNumGas == 0) + return; + + if(Ngb_Nodes) + { + myfree_movable(Ngb_Marker); + myfree_movable(Ngb_Father); + myfree_movable(Ngb_Nextnode); +#ifdef TREE_BASED_TIMESTEPS + myfree_movable(ExtNgb_Nodes + Ngb_MaxPart); + ExtNgb_Nodes = NULL; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + myfree_movable(Ngb_Nodes + Ngb_MaxPart); + myfree_movable(Ngb_DomainNodeIndex); + + Ngb_Marker = NULL; + Ngb_Father = NULL; + Ngb_Nodes = NULL; + Ngb_DomainNodeIndex = NULL; + Ngb_Nextnode = NULL; + Ngb_MaxPart = 0; + Ngb_MaxNodes = 0; + } + else + terminate("trying to free the tree even though it's not allocated"); +} diff --git a/src/amuse/community/arepo/src/ngbtree/ngbtree_search.c b/src/amuse/community/arepo/src/ngbtree/ngbtree_search.c new file mode 100644 index 0000000000..e777a7c29c --- /dev/null +++ b/src/amuse/community/arepo/src/ngbtree/ngbtree_search.c @@ -0,0 +1,376 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/ngbtree/ngbtree_search.c + * \date 05/2018 + * \brief This file contains a search routine on the neighbor tree. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void find_nearest_meshpoint_global(mesh_search_data * + * searchdata_input, int nn, int hsmlguess, int verbose) + * int ngbsearch_primary_cell_evaluate(int target, int mode, + * int threadid) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/* temporary particle arrays */ +static MyDouble *ngbsearch_nearest_dist; +static MyDouble *ngbsearch_hsml; +static mesh_search_data *searchdata; + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble pos[3]; /* tracer particle position */ + MyDouble hsml; /* current search radius */ + MyDouble distance; /* nearest neighbor distance */ + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->pos[0] = searchdata[i].Pos[0]; + in->pos[1] = searchdata[i].Pos[1]; + in->pos[2] = searchdata[i].Pos[2]; + + in->hsml = ngbsearch_hsml[i]; + in->distance = ngbsearch_nearest_dist[i]; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Distance; /* distance to closest cell on task */ + int Task; + int Index; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + if(out->Index >= 0) + { + ngbsearch_nearest_dist[i] = out->Distance; + searchdata[i].Task = out->Task; + searchdata[i].u.Index = out->Index; + } + } + else /* combine */ + { + /* closer cell on other task? */ + if(out->Distance < ngbsearch_nearest_dist[i]) + { + ngbsearch_nearest_dist[i] = out->Distance; + searchdata[i].Task = out->Task; + searchdata[i].u.Index = out->Index; + } + } +} + +#include "../utils/generic_comm_helpers2.h" + +static int ngbsearch_primary_cell_evaluate(int target, int mode, int threadid); +static int n; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= n) + break; + + if(searchdata[i].Task == -1) + ngbsearch_primary_cell_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + ngbsearch_primary_cell_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Searches the cells at the positions in searchdata. + * + * This function searches the cells which are at the positions specified in + * searchdata. The Pos field must be set. After the search is performed the + * Task and Index field contain the task/index of the cell at position Pos. + * If hsmlguess=1 initial search radius is read from Index/Hsml union in + * searchdata. + * + * \param[in] searchdata_input Contains the search positions, after function + * call the fields Task and Index are set. + * \param[in] nn Number of items in searchdata. + * \param[in] hsmlguess Guess for initial search radius; + * 1: from searchdata; else from MeanVolume of cells. + * \param[in] verbose More output. + * + * \return void + */ +void find_nearest_meshpoint_global(mesh_search_data *searchdata_input, int nn, int hsmlguess, int verbose) +{ + int i; + n = nn; + ngbsearch_nearest_dist = mymalloc("ngbsearch_nearest_dist", n * sizeof(MyDouble)); + ngbsearch_hsml = mymalloc("ngbsearch_hsml", n * sizeof(MyDouble)); + searchdata = searchdata_input; + + for(i = 0; i < n; i++) + { + ngbsearch_nearest_dist[i] = MAX_REAL_NUMBER; + + if(hsmlguess) + ngbsearch_hsml[i] = searchdata[i].u.hsmlguess; + else + ngbsearch_hsml[i] = 1e-6 * pow(All.MeanVolume, 1.0 / 3); + + searchdata[i].Task = -1; // None found yet + } + + generic_set_MaxNexport(); + + int ntot, iter = 0; + + /* we will repeat the whole thing for those points where we did not find a nearest neighbor */ + do + { + generic_comm_pattern(n, kernel_local, kernel_imported); + + int npleft = 0; + + /* do final operations on results */ + for(i = 0; i < n; i++) + { + if(searchdata[i].Task == -1) + { + npleft++; + ngbsearch_hsml[i] *= 2.0; + + if(iter >= MAXITER - 10) + { + printf("i=%d task=%d hsml=%g nearest dist=%g pos=(%g|%g|%g)\n", i, ThisTask, ngbsearch_hsml[i], + ngbsearch_nearest_dist[i], searchdata[i].Pos[0], searchdata[i].Pos[1], searchdata[i].Pos[2]); + myflush(stdout); + } + if(iter > MAXITER) + terminate("NGBSEARCH: iter > MAXITER"); + } + } + + /* sum up the left overs */ + MPI_Allreduce(&npleft, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if(ntot > 0) /* ok, we need to repeat for a few particles */ + { + iter++; + if(iter > 0 && ThisTask == 0 && verbose) + { + printf("NGBSEARCH: iteration %d: need to repeat for %d points.\n", iter, ntot); + myflush(stdout); + } + + if(iter > MAXITER) + terminate("NGBSEARCH: failed to converge in tracer particles\n"); + } + } + while(ntot > 0); + + myfree(ngbsearch_hsml); + myfree(ngbsearch_nearest_dist); +} + +/*! \brief Performs the neighbor search. + * + * \param[in] target the index of the particle to process(mode 0: in + * searchdata, mode 1: in NgbSearchDataGet/Result). + * \param[in] mode either 0 (handle local particles) or 1 (handle particles + * sent to us). + * \param[in] treadid Id of thread. + * + * \return 0 + */ +int ngbsearch_primary_cell_evaluate(int target, int mode, int threadid) +{ + int j, n; + int numnodes, *firstnode; + MyDouble h, distmax; + MyDouble dx, dy, dz, r; + MyDouble *pos; + data_in local, *target_data; + data_out out; + + int index = -1; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->pos; + h = target_data->hsml; + distmax = target_data->distance; + + int numngb = ngb_treefind_variable_threads(pos, h, target, mode, threadid, numnodes, firstnode); + + for(n = 0; n < numngb; n++) + { + j = Thread[threadid].Ngblist[n]; + + dx = pos[0] - P[j].Pos[0]; + dy = pos[1] - P[j].Pos[1]; + dz = pos[2] - P[j].Pos[2]; + + if(dx > boxHalf_X) + dx -= boxSize_X; + if(dx < -boxHalf_X) + dx += boxSize_X; + if(dy > boxHalf_Y) + dy -= boxSize_Y; + if(dy < -boxHalf_Y) + dy += boxSize_Y; + if(dz > boxHalf_Z) + dz -= boxSize_Z; + if(dz < -boxHalf_Z) + dz += boxSize_Z; + + r = sqrt(dx * dx + dy * dy + dz * dz); + if(r < distmax && r < h && P[j].ID != 0 && P[j].Mass > 0) + { + distmax = r; + index = j; + } + } + + out.Distance = distmax; + out.Task = ThisTask; + out.Index = index; + + if(index < 0) + { + out.Distance = MAX_REAL_NUMBER; + out.Task = -1; + out.Index = -1; + } + + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} diff --git a/src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c b/src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c new file mode 100644 index 0000000000..c682ce157d --- /dev/null +++ b/src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c @@ -0,0 +1,225 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/ngbtree/ngbtree_walk.c + * \date 05/2018 + * \brief Routines to walk the ngb tree. + * \details contains functions: + * int ngb_treefind_variable_threads(MyDouble searchcenter[3], + * MyFloat hsml, int target, int mode, int thread_id, int + * numnodes, int *firstnode) + * int ngb_treefind_export_node_threads(int no, int target, int + * thread_id, int image_flag) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 16.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Finds all cells around seearchcenter in region with radius hsml. + * + * This function returns the number of neighbors with distance <= hsml, and + * returns the particle indices in the global buffer Ngblist. + * The tree traversal starts at startnode. + * Keep in mind that this is usually called within an *_evaluate function + * within the generic communication pattern. This means that first, the local + * (bound to this task) search is performed and the local neighbors written + * to the array, then communication happens and afterwards, the function is + * called again in imported mode, finding particles on other tasks. + * + * \param[in] searchcenter Center of the neighbor search. + * \param[in] hsml Radius of the search. + * \param[in] target Index of the particle around which the search is + * performed; needed for parallel search. If < 0, only local search + * is performed. + * \param[in] mode Mode for local or imported particle search. + * \param[in] thread_id ID of thread (always 0 in our case). + * \param[in] numnodes Number of nodes on this task (1 for mode local; + * for mode imported: given by generic_get_numnodes(...) ). + * \param[in] firstnode Node to start with (in case of mode imported). + * + * \return The number of neighbors found. + */ +int ngb_treefind_variable_threads(MyDouble searchcenter[3], MyFloat hsml, int target, int mode, int thread_id, int numnodes, + int *firstnode) +{ + MyDouble search_min[3], search_max[3], search_max_Lsub[3], search_min_Ladd[3]; + + for(int i = 0; i < 3; i++) + { + search_min[i] = searchcenter[i] - 1.001 * hsml; + search_max[i] = searchcenter[i] + 1.001 * hsml; + } + + search_max_Lsub[0] = search_max[0] - boxSize_X; + search_max_Lsub[1] = search_max[1] - boxSize_Y; + search_max_Lsub[2] = search_max[2] - boxSize_Z; + + search_min_Ladd[0] = search_min[0] + boxSize_X; + search_min_Ladd[1] = search_min[1] + boxSize_Y; + search_min_Ladd[2] = search_min[2] + boxSize_Z; + + int numngb = 0; + double xtmp, ytmp, ztmp; + double hsml2 = hsml * hsml; + + for(int k = 0; k < numnodes; k++) + { + int no; + + if(mode == MODE_LOCAL_PARTICLES) + { + no = Ngb_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Ngb_Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Ngb_MaxPart) /* single particle */ + { + int p = no; + no = Ngb_Nextnode[no]; + + if(P[p].Type > 0) + continue; + + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + double dx = NGB_PERIODIC_LONG_X(P[p].Pos[0] - searchcenter[0]); + if(dx > hsml) + continue; + double dy = NGB_PERIODIC_LONG_Y(P[p].Pos[1] - searchcenter[1]); + if(dy > hsml) + continue; + double dz = NGB_PERIODIC_LONG_Z(P[p].Pos[2] - searchcenter[2]); + if(dz > hsml) + continue; + + double r2 = dx * dx + dy * dy + dz * dz; + if(r2 > hsml2) + continue; + + Thread[thread_id].R2list[numngb] = r2; + Thread[thread_id].Ngblist[numngb++] = p; + } + else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */ + { + struct NgbNODE *current = &Ngb_Nodes[no]; + + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + no = current->u.d.sibling; /* in case the node can be discarded */ + + if(current->Ti_Current != All.Ti_Current) + { + drift_node(current, All.Ti_Current); + } + + if(search_min[0] > current->u.d.range_max[0] && search_max_Lsub[0] < current->u.d.range_min[0]) + continue; + if(search_min_Ladd[0] > current->u.d.range_max[0] && search_max[0] < current->u.d.range_min[0]) + continue; + + if(search_min[1] > current->u.d.range_max[1] && search_max_Lsub[1] < current->u.d.range_min[1]) + continue; + if(search_min_Ladd[1] > current->u.d.range_max[1] && search_max[1] < current->u.d.range_min[1]) + continue; + + if(search_min[2] > current->u.d.range_max[2] && search_max_Lsub[2] < current->u.d.range_min[2]) + continue; + if(search_min_Ladd[2] > current->u.d.range_max[2] && search_max[2] < current->u.d.range_min[2]) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES should not occur here"); + + if(target >= 0) /* if no target is given, export will not occur */ + if(ngb_treefind_export_node_threads(no, target, thread_id, 0)) + return -1; + + no = Ngb_Nextnode[no - Ngb_MaxNodes]; + continue; + } + } + } + return numngb; +} + +/*! \brief Prepares export of ngb-tree node. + * + * \param[in] no Pseudoparticle node to be exported. + * \param[in] target (Local) index to identify what it refers to. + * \param[in] thread_id ID of thread (0 in our case). + * \param[in] image_flag Bit flag used in EXTENDED_GHOST_SEARCH. + * + * \return 0 + */ +int ngb_treefind_export_node_threads(int no, int target, int thread_id, int image_flag) +{ + /* The task indicated by the pseudoparticle node */ + int task = DomainTask[no - (Ngb_MaxPart + Ngb_MaxNodes)]; + + if(Thread[thread_id].Exportflag[task] != target) + { + Thread[thread_id].Exportflag[task] = target; + int nexp = Thread[thread_id].Nexport++; + Thread[thread_id].PartList[nexp].Task = task; + Thread[thread_id].PartList[nexp].Index = target; + Thread[thread_id].ExportSpace -= Thread[thread_id].ItemSize; + } + + int nexp = Thread[thread_id].NexportNodes++; + nexp = -1 - nexp; + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[thread_id].PartList) + Thread[thread_id].InitialSpace); + nodelist[nexp].Task = task; + nodelist[nexp].Index = target; + nodelist[nexp].Node = Ngb_DomainNodeIndex[no - (Ngb_MaxPart + Ngb_MaxNodes)]; +#ifdef EXTENDED_GHOST_SEARCH + nodelist[nexp].BitFlags = image_flag; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + Thread[thread_id].ExportSpace -= sizeof(struct datanodelist) + sizeof(int); + return 0; +} diff --git a/src/amuse/community/arepo/src/star_formation/sfr_eEOS.c b/src/amuse/community/arepo/src/star_formation/sfr_eEOS.c new file mode 100644 index 0000000000..7e9fbef498 --- /dev/null +++ b/src/amuse/community/arepo/src/star_formation/sfr_eEOS.c @@ -0,0 +1,539 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/star_formation/sfr_eEOS.c + * \date 05/2018 + * \brief Star formation rate routines for the effective multi-phase + * model. + * \details contains functions: + * void cooling_and_starformation(void) + * double get_starformation_rate(int i) + * void init_clouds(void) + * void integrate_sfr(void) + * void set_units_sfr(void) + * double calc_egyeff(int i, double gasdens, double *ne, + * double *x, double *tsfr, double *factorEVP) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../gravity/forcetree.h" + +#ifdef USE_SFR + +/*! \brief Main driver for star formation and gas cooling. + * + * This function loops over all the active gas cells. If a given cell + * meets the criteria for star formation to be active the multi-phase + * model is activated, the properties of the cell are updated according to + * the latter and the star formation rate computed. In the other case, the + * standard isochoric cooling is applied to the gas cell by calling the + * function cool_cell() and the star formation rate is set to 0. + * + * \return void + */ +void cooling_and_starformation(void) +{ + TIMER_START(CPU_COOLINGSFR); + + int idx, i, bin, flag; + double dt, dtime, ne = 1; + double unew, du; + double cloudmass; + double factorEVP, dens; + double tsfr; + double egyeff, x; + + double eos_dens_threshold = All.PhysDensThresh; + + /* note: assuming FULL ionization */ + double u_to_temp_fac = + (4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC))) * PROTONMASS / BOLTZMANN * GAMMA_MINUS1 * All.UnitEnergy_in_cgs / All.UnitMass_in_g; + + /* clear the SFR stored in the active timebins */ + for(bin = 0; bin < TIMEBINS; bin++) + if(TimeBinSynchronized[bin]) + TimeBinSfr[bin] = 0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].Mass == 0 && P[i].ID == 0) + continue; /* skip cells that have been swallowed or eliminated */ + + dens = SphP[i].Density; + + dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + dtime = All.cf_atime * dt / All.cf_time_hubble_a; + + /* apply the temperature floor */ + + unew = dmax(All.MinEgySpec, SphP[i].Utherm); + + if(unew < 0) + terminate("Invalid Temperature: Task=%d i=%d unew=%g\n", ThisTask, i, unew); + + du = unew - SphP[i].Utherm; + SphP[i].Utherm += du; + SphP[i].Energy += All.cf_atime * All.cf_atime * du * P[i].Mass; + + egyeff = 0.; + /* calculate the effective equation of state for gas above the density threshold */ + if(dens * All.cf_a3inv >= eos_dens_threshold) + { + ne = SphP[i].Ne; + egyeff = calc_egyeff(i, dens * All.cf_a3inv, &ne, &x, &tsfr, &factorEVP); + } + + /* do cooling, except for gas above the EOS density threshold that is colder than the eEOS */ + if(dens * All.cf_a3inv < eos_dens_threshold || (dens * All.cf_a3inv >= eos_dens_threshold && SphP[i].Utherm > egyeff)) + { + cool_cell(i); + } + + /* check whether conditions for star formation are fulfilled. + * f=1 normal cooling + * f=0 star formation + */ + + flag = 1; /* default is normal cooling */ + + /* enable star formation if gas is above SF density threshold */ + if(dens * All.cf_a3inv >= eos_dens_threshold) + if(SphP[i].Utherm <= egyeff || u_to_temp_fac * SphP[i].Utherm <= All.TemperatureThresh) + flag = 0; + + if(All.ComovingIntegrationOn) + if(dens < All.OverDensThresh) + flag = 1; + + if(P[i].Mass == 0) /* tracer particles don't form stars */ + flag = 1; + + if(flag == 1) + SphP[i].Sfr = 0; + + /* active star formation */ + if(flag == 0) + { + SphP[i].Ne = (HYDROGEN_MASSFRAC + 1) / 2 / HYDROGEN_MASSFRAC; /* note: assuming FULL ionization */ + + cloudmass = x * P[i].Mass; + + if(tsfr < dtime) + tsfr = dtime; + + if(dt > 0) + { + if(P[i].TimeBinHydro) /* upon start-up, we need to protect against dt==0 */ + { + unew = SphP[i].Utherm; + + // put (cold) star forming cells on the effective equation of state + if(SphP[i].Utherm < egyeff) + { + unew = egyeff; + } + + du = unew - SphP[i].Utherm; + if(unew < All.MinEgySpec) + du = All.MinEgySpec - SphP[i].Utherm; + + SphP[i].Utherm += du; + SphP[i].Energy += All.cf_atime * All.cf_atime * du * P[i].Mass; + +#ifdef OUTPUT_COOLHEAT + if(dtime > 0) + SphP[i].CoolHeat = du * P[i].Mass / dtime; +#endif /* #ifdef OUTPUT_COOLHEAT */ + + set_pressure_of_cell(i); + } + } + + SphP[i].Sfr = (1 - All.FactorSN) * cloudmass / tsfr * (All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR); + + TimeBinSfr[P[i].TimeBinHydro] += SphP[i].Sfr; + } + } /* end of main loop over active particles */ + + TIMER_STOP(CPU_COOLINGSFR); +} + +/*! \brief Return the star formation rate associated with the gas cell i. + * + * \param[in] i the index of the gas cell. + * + * \return star formation rate in solar masses / yr. + */ +double get_starformation_rate(int i) +{ + if(RestartFlag == 3) + return SphP[i].Sfr; + + double rateOfSF; + int flag; + double tsfr; + double factorEVP, egyeff, ne, x, cloudmass; + /* note: assuming FULL ionization */ + double u_to_temp_fac = + (4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC))) * PROTONMASS / BOLTZMANN * GAMMA_MINUS1 * All.UnitEnergy_in_cgs / All.UnitMass_in_g; + + double eos_dens_threshold = All.PhysDensThresh; + + flag = 1; /* default is normal cooling */ + egyeff = 0.0; + + if(SphP[i].Density * All.cf_a3inv >= eos_dens_threshold) + { + ne = SphP[i].Ne; + egyeff = calc_egyeff(i, SphP[i].Density * All.cf_a3inv, &ne, &x, &tsfr, &factorEVP); + } + + if(SphP[i].Density * All.cf_a3inv >= All.PhysDensThresh) + if(SphP[i].Utherm <= 1.01 * egyeff || u_to_temp_fac * SphP[i].Utherm <= All.TemperatureThresh) + flag = 0; + + if(All.ComovingIntegrationOn) + if(SphP[i].Density < All.OverDensThresh) + flag = 1; + + if(flag == 1) + return 0; + + cloudmass = x * P[i].Mass; + + rateOfSF = (1 - All.FactorSN) * cloudmass / tsfr; + + /* convert to solar masses per yr */ + rateOfSF *= (All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR); + + return rateOfSF; +} + +/*! \brief Initialize the parameters of effective multi-phase model. + * + * In particular this function computes the value of PhysDensThresh, that is + * the physical density threshold above which star formation is active, if + * its value was set to 0 in the parameter file. + * + * \return void + */ +void init_clouds(void) +{ + double A0, dens, tcool, ne, coolrate, egyhot, x, u4, meanweight; + double tsfr, peff, fac, neff, egyeff, factorEVP, sigma, thresholdStarburst; + + if(All.PhysDensThresh == 0) + { + A0 = All.FactorEVP; + + egyhot = All.EgySpecSN / A0; + + meanweight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); /* note: assuming FULL ionization */ + u4 = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * 1.0e4; + u4 *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; + + /* choose a high reference density to avoid that we pick up a compton cooling contribution */ + if(All.ComovingIntegrationOn) + dens = 1.0e10 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + else + dens = 1.0e10 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + if(All.ComovingIntegrationOn) + { + All.Time = 1.0; /* to be guaranteed to get z=0 rate */ + set_cosmo_factors_for_current_time(); + IonizeParams(); + } + + ne = 1.0; + SetZeroIonization(); + + tcool = GetCoolingTime(egyhot, dens, &ne); + + coolrate = egyhot / tcool / dens; + + x = (egyhot - u4) / (egyhot - All.EgySpecCold); + + All.PhysDensThresh = + x / pow(1 - x, 2) * (All.FactorSN * All.EgySpecSN - (1 - All.FactorSN) * All.EgySpecCold) / (All.MaxSfrTimescale * coolrate); + + mpi_printf( + "USE_SFR: A0=%g PhysDensThresh=%g (int units) %g h^2 cm^-3 expected fraction of cold gas at threshold=%g tcool=%g " + "dens=%g egyhot=%g\n", + A0, All.PhysDensThresh, All.PhysDensThresh / (PROTONMASS / HYDROGEN_MASSFRAC / All.UnitDensity_in_cgs), x, tcool, dens, + egyhot); + + dens = All.PhysDensThresh; + + do + { + ne = 0.5; + egyeff = calc_egyeff(-1, dens, &ne, &x, &tsfr, &factorEVP); + peff = GAMMA_MINUS1 * dens * egyeff; + + fac = 1 / (log(dens * 1.025) - log(dens)); + dens *= 1.025; + + neff = -log(peff) * fac; + + ne = 0.5; + egyeff = calc_egyeff(-1, dens, &ne, &x, &tsfr, &factorEVP); + peff = GAMMA_MINUS1 * dens * egyeff; + + neff += log(peff) * fac; + } + while(neff > 4.0 / 3); + + thresholdStarburst = dens; + + mpi_printf("USE_SFR: run-away sets in for dens=%g dynamic range for quiescent star formation=%g\n", thresholdStarburst, + thresholdStarburst / All.PhysDensThresh); + + integrate_sfr(); + + if(ThisTask == 0) + { + sigma = 10.0 / All.Hubble * 1.0e-10 / pow(1.0e-3, 2); + + printf("USE_SFR: isotherm sheet central density=%g z0=%g\n", M_PI * All.G * sigma * sigma / (2 * GAMMA_MINUS1) / u4, + GAMMA_MINUS1 * u4 / (2 * M_PI * All.G * sigma)); + myflush(stdout); + } + + mpi_printf("USE_SFR: SNII energy=%g [internal units] = %g [erg/M_sun] = %g [1e51 erg/Msun]\n", All.FactorSN * All.EgySpecSN, + All.FactorSN * All.EgySpecSN / (1 - All.FactorSN) / (All.UnitMass_in_g / All.UnitEnergy_in_cgs) * SOLAR_MASS, + All.FactorSN * All.EgySpecSN / (1 - All.FactorSN) / (All.UnitMass_in_g / All.UnitEnergy_in_cgs) * SOLAR_MASS / 1e51); + + if(All.ComovingIntegrationOn) + { + All.Time = All.TimeBegin; + set_cosmo_factors_for_current_time(); + IonizeParams(); + } + } +} + +/*! \brief Compute the effective equation of state for the gas and + * the integrated SFR per unit area. + * + * This function computes the effective equation of state for the gas and + * the integrated SFR per unit area. It saves the results into two files: + * eos.txt for the equation of state and sfrrate.txt for the integrated SFR. + * In the latter case, the SFR is determined by integrating along the vertical + * direction the gas density of an infinite self-gravitating isothermal sheet. + * The integrated gas density is saved as well, so effectively sfrrate.txt + * contains the Kennicutt-Schmidt law of the star formation model. + * + * \return void + */ +void integrate_sfr(void) +{ + double rho0, rho, rho2, q, dz, gam, sigma = 0, sigma_u4, sigmasfr = 0, ne, P1; + double x = 0, P, P2, x2, tsfr2, factorEVP2, drho, dq; + double meanweight, u4, tsfr, factorEVP, egyeff, egyeff2; + FILE *fd; + + double eos_dens_threshold = All.PhysDensThresh; + + meanweight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); /* note: assuming FULL ionization */ + u4 = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * 1.0e4; + u4 *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; + + if(All.ComovingIntegrationOn) + { + All.Time = 1.0; /* to be guaranteed to get z=0 rate */ + set_cosmo_factors_for_current_time(); + IonizeParams(); + } + + if(WriteMiscFiles && (ThisTask == 0)) + fd = fopen("eos.txt", "w"); + else + fd = 0; + + for(rho = eos_dens_threshold; rho <= 1000 * eos_dens_threshold; rho *= 1.1) + { + ne = 1.0; + egyeff = calc_egyeff(-1, rho, &ne, &x, &tsfr, &factorEVP); + + P = GAMMA_MINUS1 * rho * egyeff; + + if(WriteMiscFiles && (ThisTask == 0)) + { + fprintf(fd, "%g %g %g\n", rho, P, x); + } + } + + if(WriteMiscFiles && (ThisTask == 0)) + fclose(fd); + + if(WriteMiscFiles && (ThisTask == 0)) + fd = fopen("sfrrate.txt", "w"); + else + fd = 0; + + for(rho0 = eos_dens_threshold; rho0 <= 10000 * eos_dens_threshold; rho0 *= 1.02) + { + rho = rho0; + q = 0; + dz = 0.001; + + sigma = sigmasfr = sigma_u4 = 0; + + while(rho > 0.0001 * rho0) + { + if(rho > All.PhysDensThresh) + { + ne = 1.0; + egyeff = calc_egyeff(-1, rho, &ne, &x, &tsfr, &factorEVP); + + P = P1 = GAMMA_MINUS1 * rho * egyeff; + + rho2 = 1.1 * rho; + + egyeff2 = calc_egyeff(-1, rho2, &ne, &x2, &tsfr2, &factorEVP2); + + P2 = GAMMA_MINUS1 * rho2 * egyeff2; + + gam = log(P2 / P1) / log(rho2 / rho); + } + else + { + tsfr = 0; + + P = GAMMA_MINUS1 * rho * u4; + gam = 1.0; + + sigma_u4 += rho * dz; + } + + drho = q; + dq = -(gam - 2) / rho * q * q - 4 * M_PI * All.G / (gam * P) * rho * rho * rho; + + sigma += rho * dz; + if(tsfr > 0) + { + sigmasfr += (1 - All.FactorSN) * rho * x / tsfr * dz; + } + + rho += drho * dz; + q += dq * dz; + } + + sigma *= 2; /* to include the other side */ + sigmasfr *= 2; + sigma_u4 *= 2; + + sigma *= All.HubbleParam * (All.UnitMass_in_g / SOLAR_MASS) * PARSEC * PARSEC / (All.UnitLength_in_cm * All.UnitLength_in_cm); + sigmasfr *= All.HubbleParam * All.HubbleParam * (All.UnitMass_in_g / SOLAR_MASS) * (SEC_PER_YEAR / All.UnitTime_in_s) * 1.0e6 * + PARSEC * PARSEC / (All.UnitLength_in_cm * All.UnitLength_in_cm); + sigma_u4 *= All.HubbleParam * (All.UnitMass_in_g / SOLAR_MASS) * PARSEC * PARSEC / (All.UnitLength_in_cm * All.UnitLength_in_cm); + + if(WriteMiscFiles && (ThisTask == 0)) + { + fprintf(fd, "%g %g %g %g\n", rho0, sigma, sigmasfr, sigma_u4); + } + } + + if(All.ComovingIntegrationOn) + { + All.Time = All.TimeBegin; + set_cosmo_factors_for_current_time(); + IonizeParams(); + } + + if(WriteMiscFiles && (ThisTask == 0)) + fclose(fd); +} + +/*! \brief Set the appropriate units for the parameters of the multi-phase + * model. + * + * \return void + */ +void set_units_sfr(void) +{ + double meanweight; + + All.OverDensThresh = All.CritOverDensity * All.OmegaBaryon * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + All.PhysDensThresh = All.CritPhysDensity * PROTONMASS / HYDROGEN_MASSFRAC / All.UnitDensity_in_cgs; + + meanweight = 4 / (1 + 3 * HYDROGEN_MASSFRAC); /* note: assuming NEUTRAL GAS */ + + All.EgySpecCold = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.TempClouds; + All.EgySpecCold *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; + + meanweight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); /* note: assuming FULL ionization */ + + All.EgySpecSN = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.TempSupernova; + All.EgySpecSN *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; +} + +/*! \brief Calculate the effective energy of the multi-phase model. + * + * \param[in] i (unused) + * \param[in] gasdens gas density. + * \param[in, out] ne Fractional electron density. + * \param[out] x Fraction cold gas within model. + * \param[out] tsfr Star formation timescale. + * \param[out] factorEVP Supernova evaporation factor for given density. + */ +double calc_egyeff(int i, double gasdens, double *ne, double *x, double *tsfr, double *factorEVP) +{ + double egyhot, egyeff, tcool, y; + double rho = gasdens; + + rho = dmax(rho, All.PhysDensThresh); + + *tsfr = sqrt(All.PhysDensThresh / rho) * All.MaxSfrTimescale; + + *factorEVP = pow(rho / All.PhysDensThresh, -0.8) * All.FactorEVP; + + egyhot = All.EgySpecSN / (1 + *factorEVP) + All.EgySpecCold; + + tcool = GetCoolingTime(egyhot, rho, ne); + + y = *tsfr / tcool * egyhot / (All.FactorSN * All.EgySpecSN - (1 - All.FactorSN) * All.EgySpecCold); + + *x = 1 + 1 / (2 * y) - sqrt(1 / y + 1 / (4 * y * y)); + + egyeff = egyhot * (1 - *x) + All.EgySpecCold * (*x); + + return egyeff; +} + +#endif /* #ifdef USE_SFR */ diff --git a/src/amuse/community/arepo/src/star_formation/starformation.c b/src/amuse/community/arepo/src/star_formation/starformation.c new file mode 100644 index 0000000000..9ce94a96e5 --- /dev/null +++ b/src/amuse/community/arepo/src/star_formation/starformation.c @@ -0,0 +1,437 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/star_formation/starformation.c + * \date 05/2018 + * \brief Generic creation routines for star particles. + * \details Star formation rates are calculated in sfr_eEOS for the + * multiphase model. + * contains functions: + * void sfr_init() + * void sfr_create_star_particles(void) + * void convert_cell_into_star(int i, double birthtime) + * void spawn_star_from_cell(int igas, double birthtime, int + * istar, MyDouble mass_of_star) + * void make_star(int idx, int i, double prob, MyDouble + * mass_of_star, double *sum_mass_stars) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.06.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../gravity/forcetree.h" + +#ifdef USE_SFR + +static int stars_spawned; /*!< local number of star particles spawned in the time step */ +static int tot_stars_spawned; /*!< global number of star paricles spawned in the time step */ +static int stars_converted; /*!< local number of gas cells converted into stars in the time step */ +static int tot_stars_converted; /*!< global number of gas cells converted into stars in the time step */ +static int altogether_spawned; /*!< local number of star+wind particles spawned in the time step */ +static int tot_altogether_spawned; /*!< global number of star+wind particles spawned in the time step */ +static double cum_mass_stars = 0.0; /*!< cumulative mass of stars created in the time step (global value) */ + +static int sfr_init_called = 0; + +/*! \brief Initialization routine. + * + * \return void + */ +void sfr_init() +{ + if(sfr_init_called) + return; + + sfr_init_called = 1; + + init_clouds(); +} + +/*! \brief This routine creates star particles according to their + * respective rates. + * + * This function loops over all the active gas cells. If in a given cell the + * SFR is greater than zero, the probability of forming a star is computed + * and the corresponding particle is created stichastically according to the + * model in Springel & Hernquist (2003, MNRAS). It also saves information + * about the formed stellar mass and the star formation rate in the file + * FdSfr. + * + * \return void + */ +void sfr_create_star_particles(void) +{ + TIMER_START(CPU_COOLINGSFR); + + int idx, i, bin; + double dt, dtime; + MyDouble mass_of_star; + double sum_sm, total_sm, rate, sum_mass_stars, total_sum_mass_stars; + double p = 0, pall = 0, prob, p_decide; + double rate_in_msunperyear; + double sfrrate, totsfrrate; + + stars_spawned = stars_converted = 0; + sum_sm = sum_mass_stars = 0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i >= 0) + { + if(P[i].Mass == 0 && P[i].ID == 0) + continue; /* skip cells that have been swallowed or eliminated */ + +#ifdef SFR_KEEP_CELLS + if(P[i].Mass < 0.3 * All.TargetGasMass) + continue; +#endif /* #ifdef SFR_KEEP_CELLS */ + + dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + + /* the actual time-step */ + + dtime = All.cf_atime * dt / All.cf_time_hubble_a; + + mass_of_star = 0; + prob = 0; + p = 0; + pall = 0; + + if(SphP[i].Sfr > 0) + { + p = SphP[i].Sfr / ((All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR)) * dtime / P[i].Mass; + pall = p; + sum_sm += P[i].Mass * (1 - exp(-p)); + +#if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) + + if(P[i].Mass < 2.0 * All.TargetGasMass) +#ifdef SFR_KEEP_CELLS + mass_of_star = 0.9 * P[i].Mass; +#else /* #ifdef SFR_KEEP_CELLS */ + mass_of_star = P[i].Mass; +#endif /* #ifdef SFR_KEEP_CELLS */ + else + mass_of_star = All.TargetGasMass; + +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].HighResMass < HIGHRESMASSFAC * P[i].Mass) + { + /* this cell does not appear to be in the high-res region. + If we form a star, then it is given the mass of the cell, + and later we give the star the SofteningType=3 particle to give it large softening */ +#ifdef SFR_KEEP_CELLS + mass_of_star = 0.9 * P[i].Mass; +#else /* #ifdef SFR_KEEP_CELLS */ + mass_of_star = P[i].Mass; +#endif /* #ifdef SFR_KEEP_CELLS #else */ + } + +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + +#else /* #if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) */ + mass_of_star = P[i].Mass; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) #else */ + +#ifdef SFR_KEEP_CELLS + if(P[i].Mass < 0.5 * All.TargetGasMass) + continue; /* do not make stars from cells that should be derefined */ +#endif /* #ifdef SFR_KEEP_CELLS */ + + prob = P[i].Mass / mass_of_star * (1 - exp(-pall)); + } + + if(prob == 0) + continue; + + if(prob < 0) + terminate("prob < 0"); + + if(prob > 1) + { + printf( + "SFR: Warning, need to make a heavier star than desired. Task=%d prob=%g P[i].Mass=%g mass_of_star=%g " + "mass_of_star_new=%g p=%g pall=%g\n", + ThisTask, prob, P[i].Mass, mass_of_star, P[i].Mass * (1 - exp(-pall)), p, pall); + mass_of_star = P[i].Mass * (1 - exp(-pall)); + prob = 1.0; + } + + /* decide what process to consider (currently available: make a star or kick to wind) */ + p_decide = get_random_number(); + + if(p_decide < p / pall) /* ok, it is decided to consider star formation */ + make_star(idx, i, prob, mass_of_star, &sum_mass_stars); + } + } /* end of main loop over active gas particles */ + + int in[4], out[4], cnt = 2; + in[0] = stars_spawned; + in[1] = stars_converted; + + MPI_Allreduce(in, out, cnt, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + tot_stars_spawned = out[0]; + tot_stars_converted = out[1]; + + if(tot_stars_spawned > 0 || tot_stars_converted > 0) + mpi_printf("SFR: spawned %d stars, converted %d gas particles into stars\n", tot_stars_spawned, tot_stars_converted); + + tot_altogether_spawned = tot_stars_spawned; + altogether_spawned = stars_spawned; + + if(tot_altogether_spawned) + { + /* need to assign new unique IDs to the spawned stars */ + + int *list; + + if(All.MaxID == 0) /* MaxID not calculated yet */ + calculate_maxid(); + + list = mymalloc("list", NTask * sizeof(int)); + + MPI_Allgather(&altogether_spawned, 1, MPI_INT, list, 1, MPI_INT, MPI_COMM_WORLD); + + MyIDType newid = All.MaxID + 1; + + for(i = 0; i < ThisTask; i++) + newid += list[i]; + + myfree(list); + + for(i = 0; i < altogether_spawned; i++) + { + P[NumPart + i].ID = newid; + + newid++; + } + + All.MaxID += tot_altogether_spawned; + } + + /* Note: New tree construction can be avoided because of `force_add_star_to_tree()' */ + if(tot_stars_spawned > 0 || tot_stars_converted > 0) + { + All.TotNumPart += tot_stars_spawned; + All.TotNumGas -= tot_stars_converted; + NumPart += stars_spawned; + } + + for(bin = 0, sfrrate = 0; bin < TIMEBINS; bin++) + if(TimeBinsHydro.TimeBinCount[bin]) + sfrrate += TimeBinSfr[bin]; + + double din[3] = {sfrrate, sum_sm, sum_mass_stars}, dout[3]; + + MPI_Reduce(din, dout, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + totsfrrate = dout[0]; + total_sm = dout[1]; + total_sum_mass_stars = dout[2]; + + if(All.TimeStep > 0) + rate = total_sm / (All.TimeStep / All.cf_time_hubble_a); + else + rate = 0; + + /* compute the cumulative mass of stars */ + cum_mass_stars += total_sum_mass_stars; + + /* convert to solar masses per yr */ + rate_in_msunperyear = rate * (All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR); + + fprintf(FdSfr, "%14e %14e %14e %14e %14e %14e\n", All.Time, total_sm, totsfrrate, rate_in_msunperyear, total_sum_mass_stars, + cum_mass_stars); + myflush(FdSfr); + } + + TIMER_STOP(CPU_COOLINGSFR); +} + +/*! \brief Convert a cell into a star. + * + * This function converts an active star-forming gas cell into a star. + * The particle information of the gas cell is copied to the + * location star and the fields necessary for the creation of the star + * particle are initialized. + * + * \param[in] i Index of the gas cell to be converted. + * \param[in] birthtime Time of birth (in code units) of the stellar particle. + * + * \return void + */ +void convert_cell_into_star(int i, double birthtime) +{ + P[i].Type = 4; + P[i].SofteningType = All.SofteningTypeOfPartType[P[i].Type]; + +#if defined(REFINEMENT_HIGH_RES_GAS) + if(SphP[i].HighResMass < HIGHRESMASSFAC * P[i].Mass) + { + /* this cell does not appear to be in the high-res region. + We give the star the SofteningType=3 particle to give it large softening */ + P[i].SofteningType = All.SofteningTypeOfPartType[3]; + } +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING + if(((1 << P[i].Type) & (INDIVIDUAL_GRAVITY_SOFTENING))) + P[i].SofteningType = get_softening_type_from_mass(P[i].Mass); +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + + TimeBinSfr[P[i].TimeBinHydro] -= SphP[i].Sfr; + + voronoi_remove_connection(i); + + return; +} + +/*! \brief Spawn a star particle from a gas cell. + * + * This function spawns a star particle from an active star-forming + * cell. The particle information of the gas cell is copied to the + * location istar and the fields necessary for the creation of the star + * particle are initialized. The conserved variables of the gas cell + * are then updated according to the mass ratio between the two components + * to ensure conservation. + * + * \param[in] igas Index of the gas cell from which the star is spawned. + * \param[in] birthtime Time of birth (in code units) of the stellar particle. + * \param[in] istar Index of the spawned stellar particle. + * \param[in] mass_of_star The mass of the spawned stellar particle. + * + * \return void + */ +void spawn_star_from_cell(int igas, double birthtime, int istar, MyDouble mass_of_star) +{ + P[istar] = P[igas]; + P[istar].Type = 4; + P[istar].SofteningType = All.SofteningTypeOfPartType[P[istar].Type]; + P[istar].Mass = mass_of_star; + +#if defined(REFINEMENT_HIGH_RES_GAS) + if(SphP[igas].HighResMass < HIGHRESMASSFAC * P[igas].Mass) + { + /* this cell does not appear to be in the high-res region. + We give the star the SofteningType=3 particle to give it large softening */ + P[istar].SofteningType = All.SofteningTypeOfPartType[3]; + } +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING + if(((1 << P[istar].Type) & (INDIVIDUAL_GRAVITY_SOFTENING))) + P[istar].SofteningType = get_softening_type_from_mass(P[istar].Mass); +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + + timebin_add_particle(&TimeBinsGravity, istar, igas, P[istar].TimeBinGrav, TimeBinSynchronized[P[istar].TimeBinGrav]); + + /* now change the conserved quantities in the cell in proportion */ + double fac = (P[igas].Mass - P[istar].Mass) / P[igas].Mass; + +#ifdef MHD + double Emag = 0.5 * (SphP[igas].B[0] * SphP[igas].B[0] + SphP[igas].B[1] * SphP[igas].B[1] + SphP[igas].B[2] * SphP[igas].B[2]) * + SphP[igas].Volume * All.cf_atime; + SphP[igas].Energy -= Emag; +#endif /* #ifdef MHD */ + + P[igas].Mass *= fac; + SphP[igas].Energy *= fac; + SphP[igas].Momentum[0] *= fac; + SphP[igas].Momentum[1] *= fac; + SphP[igas].Momentum[2] *= fac; + +#ifdef MHD + SphP[igas].Energy += Emag; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(int s = 0; s < N_Scalar; s++) /* Note, the changes in MATERIALS, HIGHRESGASMASS, etc., are treated as part of the Scalars */ + *(MyFloat *)(((char *)(&SphP[igas])) + scalar_elements[s].offset_mass) *= fac; +#endif /* #ifdef MAXSCALARS */ + + return; +} + +/*! \brief Make a star particle from a gas cell. + * + * Given a gas cell where star formation is active and the probability + * of forming a star, this function selectes either to convert the gas + * cell into a star particle or to spawn a star depending on the + * target mass for the star. + * + * \param[in] idx Index of the gas cell in the hydro list of active cells. + * \param[in] i Index of the gas cell. + * \param[in] prob Probability of making a star. + * \param[in] mass_of_star Desired mass of the star particle. + * \param[in, out] sum_mass_stars Holds the mass of all the stars created at the + * current time-step (for the local task) + * + * \return void + */ +void make_star(int idx, int i, double prob, MyDouble mass_of_star, double *sum_mass_stars) +{ + if(mass_of_star > P[i].Mass) + terminate("mass_of_star > P[i].Mass"); + + if(get_random_number() < prob) + { + if(mass_of_star == P[i].Mass) + { + /* here we turn the gas particle itself into a star particle */ + Stars_converted++; + stars_converted++; + + *sum_mass_stars += P[i].Mass; + + convert_cell_into_star(i, All.Time); + timebin_remove_particle(&TimeBinsHydro, idx, P[i].TimeBinHydro); + } + else + { + /* in this case we spawn a new star particle, only reducing the mass in the cell by mass_of_star */ + altogether_spawned = stars_spawned; + if(NumPart + altogether_spawned >= All.MaxPart) + terminate("NumPart=%d spwawn %d particles no space left (All.MaxPart=%d)\n", NumPart, altogether_spawned, All.MaxPart); + + int j = NumPart + altogether_spawned; /* index of new star */ + + spawn_star_from_cell(i, All.Time, j, mass_of_star); + + *sum_mass_stars += mass_of_star; + stars_spawned++; + } + } +} + +#endif /* #ifdef USE_SFR */ diff --git a/src/amuse/community/arepo/src/subfind/subfind.c b/src/amuse/community/arepo/src/subfind/subfind.c new file mode 100644 index 0000000000..4759ae416a --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind.c @@ -0,0 +1,577 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind.c + * \date 05/2018 + * \brief Main routines of the subfind sub-halo finder. + * \details contains functions: + * double subfind_get_particle_balance(void) + * void subfind(int num) + * void subfind_reorder_according_to_submp(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../fof/fof.h" + +#ifdef SUBFIND +#include "subfind.h" + +/*! \brief Gets a measure of the particle load balance. + * + * \return Maximum number of particle at one core divided by its average. + */ +double subfind_get_particle_balance(void) +{ + int maxpart; + long long sum; + MPI_Allreduce(&NumPart, &maxpart, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + sumup_large_ints(1, &NumPart, &sum); + return maxpart / (((double)sum) / NTask); +} + +/*! \brief Main subfind algorithm. + * + * \param[in] num Index of this snapshot output. + * + * \return void + */ +void subfind(int num) +{ + double t0, t1, tstart, tend, cputime; + int i, gr, nlocid, offset; + + TIMER_START(CPU_SUBFIND); + + tstart = second(); + + mpi_printf("\nSUBFIND: We now execute a parallel version of SUBFIND.\n"); + + /* let's determine the local dark matter densities */ + + TIMER_STOP(CPU_SUBFIND); + construct_forcetree(0, 0, 1, All.HighestOccupiedTimeBin); /* build forcetree with all particles */ + TIMER_START(CPU_SUBFIND); + + cputime = subfind_density(FIND_SMOOTHING_LENGTHS); + mpi_printf("SUBFIND: iteration to correct primary neighbor count took %g sec\n", cputime); + + /* free the tree storage again */ + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + + TIMER_STOP(CPU_SUBFIND); + construct_forcetree(0, 0, 0, All.HighestOccupiedTimeBin); /* build forcetree with all particles */ + TIMER_START(CPU_SUBFIND); + + cputime = subfind_density(FIND_TOTAL_DENSITIES); + mpi_printf("SUBFIND: density() took %g sec\n", cputime); + + /* free the tree storage again */ + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + + for(i = 0; i < NumPart; i++) + if(P[i].Type == 0) + { +#ifdef CELL_CENTER_GRAVITY + for(int j = 0; j < 3; j++) + PS[i].Center[j] = SphP[i].Center[j]; +#endif /* #ifdef CELL_CENTER_GRAVITY */ + PS[i].Utherm = SphP[i].Utherm; + } + else + PS[i].Utherm = 0; + + SubTreeAllocFactor = All.TreeAllocFactor; + + /* Count, how many groups are above this limit, and how many processors we need for them */ + int ncount = 0, nprocs = 0; + int seriallen = 0; + long long sum_seriallen; + + double GroupSize = 0.6; + + do + { + ncount = 0; + nprocs = 0; + seriallen = 0; + + /* Let's set a fiducial size for the maximum group size before we select the collective subfind algorithm */ + MaxSerialGroupLen = (int)(GroupSize * All.TotNumPart / NTask); + + for(i = 0; i < Ngroups; i++) + if(Group[i].Len > MaxSerialGroupLen) + { + ncount++; + nprocs += ((Group[i].Len - 1) / MaxSerialGroupLen) + 1; + } + else + seriallen += Group[i].Len; + + MPI_Allreduce(&ncount, &Ncollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&nprocs, &NprocsCollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + sumup_large_ints(1, &seriallen, &sum_seriallen); + + GroupSize += 0.05; + } + while(NprocsCollective > 0 && NprocsCollective >= NTask - 1); + + if(GroupSize > 0.65) + { + mpi_printf("Increased GroupSize to %g.\n", GroupSize); + } + + MPI_Allreduce(&ncount, &Ncollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&nprocs, &NprocsCollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + sumup_large_ints(1, &seriallen, &sum_seriallen); + + mpi_printf("SUBFIND: Number of FOF halos treated with collective SubFind code = %d\n", Ncollective); + mpi_printf("SUBFIND: Number of processors used in different partitions for the collective SubFind code = %d\n", NprocsCollective); + mpi_printf("SUBFIND: (The adopted size-limit for the collective algorithm was %d particles.)\n", MaxSerialGroupLen); + mpi_printf("SUBFIND: The other %d FOF halos are treated in parallel with serial code\n", TotNgroups - Ncollective); + + /* set up a global table that informs about the processor assignment of the groups that are treated collectively */ + ProcAssign = mymalloc_movable(&ProcAssign, "ProcAssign", Ncollective * sizeof(struct proc_assign_data)); + struct proc_assign_data *locProcAssign = mymalloc("locProcAssign", ncount * sizeof(struct proc_assign_data)); + + for(i = 0, ncount = 0; i < Ngroups; i++) + if(Group[i].Len > MaxSerialGroupLen) + { + locProcAssign[ncount].GrNr = Group[i].GrNr; + locProcAssign[ncount].Len = Group[i].Len; + ncount++; + } + + /* gather the information on the collective groups accross all CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + MPI_Allgather(&ncount, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD); + + int task; + for(task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct proc_assign_data); + + for(task = 1, byteoffset[0] = 0; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + MPI_Allgatherv(locProcAssign, bytecounts[ThisTask], MPI_BYTE, ProcAssign, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + myfree(byteoffset); + myfree(bytecounts); + myfree(recvcounts); + myfree(locProcAssign); + + /* make sure, the table is sorted in ascending group-number order */ + qsort(ProcAssign, Ncollective, sizeof(struct proc_assign_data), subfind_compare_procassign_GrNr); + + /* assign the processor sets for the collective groups and set disjoint color-flag to later split the processors into different + * communicators */ + for(i = 0, nprocs = 0, CommSplitColor = Ncollective; i < Ncollective; i++) + { + ProcAssign[i].FirstTask = nprocs; + ProcAssign[i].NTask = ((ProcAssign[i].Len - 1) / MaxSerialGroupLen) + 1; + nprocs += ProcAssign[i].NTask; + + if(ThisTask >= ProcAssign[i].FirstTask && ThisTask < (ProcAssign[i].FirstTask + ProcAssign[i].NTask)) + CommSplitColor = i; + } + + /* Now assign a target task for the group. For collective groups, the target task is the master in the CPU set, whereas + * the serial ones are distributed in a round-robin fashion to the remaining CPUs + */ + for(i = 0; i < Ngroups; i++) + { + if(Group[i].Len > MaxSerialGroupLen) /* we have a collective group */ + { + if(Group[i].GrNr >= Ncollective || Group[i].GrNr < 0) + terminate("odd"); + Group[i].TargetTask = ProcAssign[Group[i].GrNr].FirstTask; + } + else + Group[i].TargetTask = ((Group[i].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective; + } + + /* distribute the groups */ + subfind_distribute_groups(); + qsort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_GrNr); + + /* assign target CPUs for the particles in groups */ + /* the particles not in groups will be distributed such that a uniform particle load results */ + t0 = second(); + int *count_loc_task = mymalloc_clear("count_loc_task", NTask * sizeof(int)); + int *count_task = mymalloc("count_task", NTask * sizeof(int)); + int *count_free = mymalloc("count_free", NTask * sizeof(int)); + int count_loc_free = 0; + + for(i = 0; i < NumPart; i++) + { + if(PS[i].GrNr < TotNgroups) /* particle is in a group */ + { + if(PS[i].GrNr < Ncollective) /* we are in a collective group */ + PS[i].TargetTask = ProcAssign[PS[i].GrNr].FirstTask + (i % ProcAssign[PS[i].GrNr].NTask); + else + PS[i].TargetTask = ((PS[i].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective; + + count_loc_task[PS[i].TargetTask]++; + } + else + count_loc_free++; + + PS[i].TargetIndex = 0; /* unimportant here */ + } + + MPI_Allgather(&count_loc_free, 1, MPI_INT, count_free, 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allreduce(count_loc_task, count_task, NTask, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + long long sum = 0; + for(i = 0; i < NTask; i++) + sum += count_task[i] + count_free[i]; + + int maxload = (sum + NTask - 1) / NTask; + for(i = 0; i < NTask; i++) + { + count_task[i] = maxload - count_task[i]; /* this is the amount that can fit on this task */ + if(count_task[i] < 0) + count_task[i] = 0; + } + + int current_task = 0; + + for(i = 0; i < ThisTask; i++) + { + while(count_free[i] > 0 && current_task < NTask) + { + if(count_free[i] < count_task[current_task]) + { + count_task[current_task] -= count_free[i]; + count_free[i] = 0; + } + else + { + count_free[i] -= count_task[current_task]; + count_task[current_task] = 0; + current_task++; + } + } + } + + for(i = 0; i < NumPart; i++) + { + if(PS[i].GrNr >= + TotNgroups) /* particle not in a group. Can in principle stay but we move it such that a good load balance is obtained. */ + { + while(count_task[current_task] == 0 && current_task < NTask - 1) + current_task++; + + PS[i].TargetTask = current_task; /* particle not in any group, move it here so that uniform load is achieved */ + count_task[current_task]--; + } + } + + myfree(count_free); + myfree(count_task); + myfree(count_loc_task); + +#ifdef SUBFIND_EXTENDED_PROPERTIES + int ngroups_cat = 42; // dummy. not used for any calculation but fct needs to receive a value and we want to keep fct universal. +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + int nsubgroups_cat = 42; // dummy. not used for any calculation but fct needs to receive a value and we want to keep fct universal. + + double balance = subfind_get_particle_balance(); + mpi_printf("SUBFIND: particle balance=%g\n", balance); + + /* distribute particles such that groups are completely on the CPU(s) that do the corresponding group(s) */ + fof_subfind_exchange(MPI_COMM_WORLD); + t1 = second(); + mpi_printf("SUBFIND: subfind_exchange() took %g sec\n", timediff(t0, t1)); + + balance = subfind_get_particle_balance(); + mpi_printf("SUBFIND: particle balance for processing=%g\n", balance); + + /* lets estimate the maximum number of substructures we need to store on the local CPU */ + if(ThisTask < NprocsCollective) + { + MaxNsubgroups = (ProcAssign[CommSplitColor].Len / ProcAssign[CommSplitColor].NTask) / All.DesLinkNgb; + } + else + { + for(i = 0, nlocid = 0; i < Ngroups; i++) + nlocid += Group[i].Len; + + MaxNsubgroups = nlocid / All.DesLinkNgb; /* should be a quite conservative upper limit */ + } + + Nsubgroups = 0; + SubGroup = (struct subgroup_properties *)mymalloc_movable(&SubGroup, "SubGroup", MaxNsubgroups * sizeof(struct subgroup_properties)); + + /* we can now split the communicator to give each collectively treated group its own processor set */ + MPI_Comm_split(MPI_COMM_WORLD, CommSplitColor, ThisTask, &SubComm); + MPI_Comm_size(SubComm, &SubNTask); + MPI_Comm_rank(SubComm, &SubThisTask); + SubTagOffset = TagOffset; + + /* here the execution paths for collective groups and serial groups branch. The collective CPUs work in small sets that each + * deal with one large group. The serial CPUs each deal with several halos by themselves + */ + if(CommSplitColor < Ncollective) /* we are one of the CPUs that does a collective group */ + { + /* we now apply a collective version of subfind to the group split across the processors belonging to communicator SubComm + * The relevant group is the one stored in Group[0] on SubThisTask==0. + */ + subfind_process_group_collectively(nsubgroups_cat); + } + else + { + /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! + */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(i = 0; i < NumPart; i++) + { + PS[i].SubNr = TotNgroups + 1; /* set a default that is larger than reasonable group number */ + PS[i].OldIndex = i; + submp[i].index = i; + submp[i].GrNr = PS[i].GrNr; + submp[i].DM_Density = PS[i].Density; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density); + subfind_reorder_according_to_submp(); + myfree(submp); + + /* now we have the particles in each group consecutively */ + if(SubThisTask == 0) + printf( + "SUBFIND-SERIAL: Start to do %d small groups (cumulative length %lld) with serial subfind algorithm on %d processors " + "(root-node=%d)\n", + TotNgroups - Ncollective, sum_seriallen, SubNTask, ThisTask); + + /* we now apply a serial version of subfind to the local groups */ + t0 = second(); + for(gr = 0, offset = 0; gr < Ngroups; gr++) + { + if(((Group[gr].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective == ThisTask) + offset = subfind_process_group_serial(gr, offset, nsubgroups_cat); + else + terminate("how come that we have this group number?"); + } + + MPI_Barrier(SubComm); + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-SERIAL: processing of serial groups took %g sec\n", timediff(t0, t1)); + + /* undo local rearrangement that made groups consecutive. After that, the association of SphP[] will be correct again */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(i = 0; i < NumPart; i++) + { + submp[i].index = i; + submp[i].OldIndex = PS[i].OldIndex; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex); + subfind_reorder_according_to_submp(); + myfree(submp); + } + + /* free the communicator */ + MPI_Comm_free(&SubComm); + + /* make common allocation on all tasks */ + int max_load, max_loadsph, load; + + /* for resize */ + load = All.MaxPart; + MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + load = All.MaxPartSph; + MPI_Allreduce(&load, &max_loadsph, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* do resize */ + All.MaxPart = max_load; + reallocate_memory_maxpart(); + PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data)); + + All.MaxPartSph = max_loadsph; + reallocate_memory_maxpartsph(); + + /* distribute particles back to original CPU */ + t0 = second(); + for(i = 0; i < NumPart; i++) + { + PS[i].TargetTask = PS[i].OriginTask; + PS[i].TargetIndex = PS[i].OriginIndex; + } + + fof_subfind_exchange(MPI_COMM_WORLD); + t1 = second(); + if(ThisTask == 0) + printf("SUBFIND: subfind_exchange() (for return to original CPU) took %g sec\n", timediff(t0, t1)); + + TIMER_STOP(CPU_SUBFIND); + construct_forcetree(0, 0, 0, All.HighestOccupiedTimeBin); /* build forcetree with all particles */ + TIMER_START(CPU_SUBFIND); + + /* compute spherical overdensities for FOF groups */ + cputime = subfind_overdensity(); + mpi_printf("SUBFIND: determining spherical overdensity masses took %g sec\n", cputime); + + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + +#ifdef SUBFIND_EXTENDED_PROPERTIES + subfind_add_grp_props_calc_fof_angular_momentum(num, ngroups_cat); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + MPI_Allreduce(&Nsubgroups, &TotNsubgroups, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + /* sort the groups according to group/subgroup-number */ + t0 = second(); + parallel_sort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_GrNr); + parallel_sort(SubGroup, Nsubgroups, sizeof(struct subgroup_properties), subfind_compare_SubGroup_GrNr_SubNr); + t1 = second(); + mpi_printf("SUBFIND: assembled and ordered groups and subgroups (took %g sec)\n", timediff(t0, t1)); + + /* determine largest subgroup and total particle/cell count in substructures */ + int lenmax, glob_lenmax, totlen; + long long totsublength; + for(i = 0, totlen = 0, lenmax = 0; i < Nsubgroups; i++) + { + totlen += SubGroup[i].Len; + + if(SubGroup[i].Len > lenmax) + lenmax = SubGroup[i].Len; + } + sumup_large_ints(1, &totlen, &totsublength); + MPI_Reduce(&lenmax, &glob_lenmax, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + + /* set binding energy of fuzz to zero, was overwritten with Hsml before; needed for proper snapshot sorting of fuzz */ + for(i = 0; i < NumPart; i++) + if(PS[i].SubNr == TotNgroups + 1) + PS[i].BindingEnergy = 0; + + TIMER_STOP(CPU_SUBFIND); + TIMER_START(CPU_SNAPSHOT); + + /* now final output of catalogue */ + subfind_save_final(num); + + TIMER_STOP(CPU_SNAPSHOT); + TIMER_START(CPU_SUBFIND); + + tend = second(); + + if(ThisTask == 0) + { + printf("SUBFIND: Finished with SUBFIND. (total time=%g sec)\n", timediff(tstart, tend)); + printf("SUBFIND: Total number of subhalos with at least %d particles: %d\n", All.DesLinkNgb, TotNsubgroups); + if(TotNsubgroups > 0) + { + printf("SUBFIND: Largest subhalo has %d particles/cells.\n", glob_lenmax); + printf("SUBFIND: Total number of particles/cells in subhalos: %lld\n", totsublength); + } + } + + myfree_movable(SubGroup); + myfree_movable(ProcAssign); + + TIMER_STOP(CPU_SUBFIND); +} + +/*! \brief Reorders particles in P and SphP array. + * + * Reordering given by the submp array. + * + * \return void + */ +void subfind_reorder_according_to_submp(void) +{ + int i; + struct particle_data Psave, Psource; + struct subfind_data PSsave, PSsource; + int idsource, idsave, dest; + int *Id; + + Id = (int *)mymalloc("Id", sizeof(int) * (NumPart)); + + for(i = 0; i < NumPart; i++) + Id[submp[i].index] = i; + + for(i = 0; i < NumPart; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + PSsource = PS[i]; + idsource = Id[i]; + + dest = Id[i]; + + do + { + Psave = P[dest]; + PSsave = PS[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + PS[dest] = PSsource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + PSsource = PSsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } + + myfree(Id); +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind.h b/src/amuse/community/arepo/src/subfind/subfind.h new file mode 100644 index 0000000000..d229af8490 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind.h @@ -0,0 +1,213 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind.h + * \date 05/2018 + * \brief Header for subfind algorithm. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef SUBFIND_H +#define SUBFIND_H + +#include "../domain/domain.h" +#include "../main/allvars.h" + +#define FIND_SMOOTHING_LENGTHS 0 +#define FIND_TOTAL_DENSITIES 1 +#define SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER 10000 +#define SUBFIND_GAL_RADIUS_FAC 2.0 /* for subfind metal calculation */ + +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) +extern int *NodeGrNr; +#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */ + +extern int GrNr; +extern int NumPartGroup; + +extern struct topnode_data *SubTopNodes; +extern struct local_topnode_data *Sub_LocTopNodes; + +extern int *SubDomainTask; +extern int *SubDomainNodeIndex; +extern int *SubNextnode; +extern int SubNTopleaves; +extern int SubNTopnodes; + +extern int SubTree_MaxPart; +extern int SubTree_NumNodes; +extern int SubTree_MaxNodes; +extern int SubTree_FirstNonTopLevelNode; +extern int SubTree_NumPartImported; +extern int SubTree_NumPartExported; +extern int SubTree_ImportedNodeOffset; +extern int SubTree_NextFreeNode; +extern MyDouble *SubTree_Pos_list; +extern struct NODE *SubNodes; +extern struct ExtNODE *SubExtNodes; + +extern double SubTreeAllocFactor; + +extern int *SubTree_ResultIndexList; +extern int *SubTree_Task_list; +extern unsigned long long *SubTree_IntPos_list; + +extern double SubDomainCorner[3], SubDomainCenter[3], SubDomainLen, SubDomainFac; +extern double SubDomainInverseLen, SubDomainBigFac; + +extern MyDouble GrCM[3]; + +extern int Ncollective; +extern int NprocsCollective; +extern int MaxNsubgroups; +extern int MaxNgbs; +extern int MaxSerialGroupLen; +extern r2type *R2list; + +extern int CommSplitColor; +extern MPI_Comm SubComm; + +extern int SubNTask, SubThisTask; +extern int SubTagOffset; + +extern struct proc_assign_data +{ + int GrNr; + int Len; + int FirstTask; + int NTask; +} * ProcAssign; + +extern struct subgroup_properties +{ + int Len; + int LenType[NTYPES]; + int GrNr; + int SubNr; + int SubParent; + MyIDType SubMostBoundID; + MyFloat Mass; + MyFloat MassType[NTYPES]; + MyFloat SubVelDisp; + MyFloat SubVmax; + MyFloat SubVmaxRad; + MyFloat SubHalfMassRad; + MyFloat SubHalfMassRadType[NTYPES]; + MyFloat SubMassInRad; + MyFloat SubMassInRadType[NTYPES]; + MyFloat SubMassInHalfRad; + MyFloat SubMassInHalfRadType[NTYPES]; + MyFloat SubMassInMaxRad; + MyFloat SubMassInMaxRadType[NTYPES]; + MyFloat Pos[3]; + MyFloat CM[3]; + MyFloat Vel[3]; + MyFloat Spin[3]; + +#ifdef MHD + MyFloat Bfld_Halo, Bfld_Disk; +#endif /* #ifdef MHD */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + MyFloat Ekin, Epot, Ethr; + MyFloat J[3], Jdm[3], Jgas[3], Jstars[3], CMFrac, CMFracType[NTYPES]; + MyFloat J_inRad[3], Jdm_inRad[3], Jgas_inRad[3], Jstars_inRad[3], CMFrac_inRad, CMFracType_inRad[NTYPES]; + MyFloat J_inHalfRad[3], Jdm_inHalfRad[3], Jgas_inHalfRad[3], Jstars_inHalfRad[3], CMFrac_inHalfRad, CMFracType_inHalfRad[NTYPES]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef USE_SFR + MyFloat Sfr, SfrInRad, SfrInHalfRad, SfrInMaxRad, GasMassSfr; +#endif /* #ifdef USE_SFR */ +} * SubGroup; + +extern struct nearest_r2_data +{ + double dist[2]; +} * R2Loc; + +extern struct nearest_ngb_data +{ + long long index[2]; + int count; +} * NgbLoc; + +extern int NumPaux; + +extern struct paux_data +{ + int TaskOfGr; + int LocGrIndex; + unsigned char Type; + unsigned char SofteningType; + MyDouble Pos[3]; + MyDouble Mass; +} * Paux; + +extern struct submp_data +{ + int index; + int GrNr; + int OldIndex; + MyFloat DM_Density; +} * submp; + +extern struct cand_dat +{ + int head; + int len; + int nsub; + int rank, subnr, parent; + int bound_length; +} * candidates; + +extern struct coll_cand_dat +{ + long long head; + long long rank; + int len; + int nsub; + int subnr, parent; + int bound_length; +} * coll_candidates; + +typedef struct +{ + double rho; +#ifdef SUBFIND_CALC_MORE + double vx, vy, vz; + double v2; +#endif +} SubDMData; + +void subfind_determine_sub_halo_properties(struct unbind_data *d, int num, struct subgroup_properties *subgroup, int grnr, int subnr, + int parallel_flag, int nsubgroups_cat); +int subfind_ngb_treefind_density(MyDouble searchcenter[3], double hsml, int target, int *startnode, int mode, int *exportflag, + int *exportnodecount, int *exportindex, SubDMData *sub_dm_data); +int subfind_treefind_collective_export_node_threads(int no, int i, int thread_id); +void subfind_domain_do_local_refine(int n, int *list); +void assign_group_numbers_based_on_catalogue(int ngroups_cat, int nsubgroups_cat); +int subfind_compare_rlist_mhd(const void *a, const void *b); + +#endif /* #ifndef SUBFIND_H */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_coll_domain.c b/src/amuse/community/arepo/src/subfind/subfind_coll_domain.c new file mode 100644 index 0000000000..9abd20009d --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_coll_domain.c @@ -0,0 +1,620 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_coll_domain.c + * \date 05/2018 + * \brief Domain decomposition for collective subfind algorithm. + * \details contains functions: + * static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) + * void subfind_coll_domain_decomposition(void) + * void subfind_coll_findExtent(void) + * int subfind_coll_domain_determineTopTree(void) + * void subfind_domain_do_local_refine(int n, int *list) + * void subfind_coll_domain_walktoptree(int no) + * void subfind_coll_domain_combine_topleaves_to_domains(int ncpu, int ndomain) + * void subfind_coll_domain_allocate(void) + * void subfind_coll_domain_free(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../domain/bsd_tree.h" +#include "../domain/domain.h" +#include "subfind.h" + +/*! \brief Define structure of my tree nodes. + */ +struct mydata +{ + double workload; + int topnode_index; + + RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */ +}; + +/*! \brief Comparison function of mydata objects (i.e. tree elements). + * + * Compares the elements (most important first): + * workload, topnode_index. + * + * \param[in] lhs First object to compare. + * \param[in] rhs Second object to compare. + * + * \return (-1,0,1) -1 if lhs.workload > rhs.workload or lhs.topnode_index < + * rhs.topnode_index. + */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) +{ + if(lhs->workload > rhs->workload) + return -1; + else if(lhs->workload < rhs->workload) + return 1; + else if(lhs->topnode_index < rhs->topnode_index) + return -1; + else if(lhs->topnode_index > rhs->topnode_index) + return 1; + + return 0; +} + +/* the following macro declares 'struct mytree', which is the header element + * needed as handle for a tree + */ +RB_HEAD(mytree, mydata); + +static struct mydata *nload; +static struct mytree queue_load; + +/* the following macros declare appropriate function prototypes and functions + * needed for this type of tree + */ +RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp); +RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp); + +/*! \brief Performs domain decomposition for subfind collective. + * + * \return void + */ +void subfind_coll_domain_decomposition(void) +{ + int i; + int col_grouplen, col_partcount; + + subfind_coll_domain_allocate(); + subfind_coll_findExtent(); + + Key = (peanokey *)mymalloc_movable(&Key, "Key", (sizeof(peanokey) * NumPart)); + Sub_LocTopNodes = (struct local_topnode_data *)mymalloc_movable(&Sub_LocTopNodes, "Sub_LocTopNodes", + (MaxTopNodes * sizeof(struct local_topnode_data))); + + MPI_Allreduce(&NumPartGroup, &col_grouplen, 1, MPI_INT, MPI_SUM, SubComm); + MPI_Allreduce(&NumPart, &col_partcount, 1, MPI_INT, MPI_SUM, SubComm); + + fac_work = 0.5 / col_grouplen; + fac_load = 0.5 / col_partcount; + + subfind_coll_domain_determineTopTree(); + + /* find the split of the top-level tree */ + subfind_coll_domain_combine_topleaves_to_domains(SubNTask, SubNTopleaves); + + /* determine the particles that need to be exported, and to which CPU they need to be sent */ + for(i = 0; i < NumPart; i++) + { + if(PS[i].GrNr == GrNr) + { + int no = 0; + while(Sub_LocTopNodes[no].Daughter >= 0) + no = Sub_LocTopNodes[no].Daughter + (Key[i] - Sub_LocTopNodes[no].StartKey) / (Sub_LocTopNodes[no].Size >> 3); + + no = Sub_LocTopNodes[no].Leaf; + + int task = SubDomainTask[no]; + + PS[i].TargetTask = task; + } + else + PS[i].TargetTask = SubThisTask; + + PS[i].TargetIndex = 0; /* unimportant here */ + } + + fof_subfind_exchange(SubComm); + + /* note that the domain decomposition leads to an invalid values of NumPartGroup. This will however be redetermined in the main + * routine of the collective subfind, after the domain decomposition has been done. + */ + + /* copy what we need for the topnodes */ + for(i = 0; i < SubNTopnodes; i++) + { + SubTopNodes[i].StartKey = Sub_LocTopNodes[i].StartKey; + SubTopNodes[i].Size = Sub_LocTopNodes[i].Size; + SubTopNodes[i].Daughter = Sub_LocTopNodes[i].Daughter; + SubTopNodes[i].Leaf = Sub_LocTopNodes[i].Leaf; + + int j; + int bits = my_ffsll(SubTopNodes[i].Size); + int blocks = (bits - 1) / 3 - 1; + + for(j = 0; j < 8; j++) + { + peano1D xb, yb, zb; + peano_hilbert_key_inverse(SubTopNodes[i].StartKey + j * (SubTopNodes[i].Size >> 3), BITS_PER_DIMENSION, &xb, &yb, &zb); + xb >>= blocks; + yb >>= blocks; + zb >>= blocks; + int idx = (xb & 1) | ((yb & 1) << 1) | ((zb & 1) << 2); + if(idx < 0 || idx > 7) + terminate("j=%d idx=%d", j, idx); + + SubTopNodes[i].MortonToPeanoSubnode[idx] = j; + } + } + + myfree(Sub_LocTopNodes); + myfree(Key); + + SubTopNodes = (struct topnode_data *)myrealloc_movable(SubTopNodes, SubNTopnodes * sizeof(struct topnode_data)); + SubDomainTask = (int *)myrealloc_movable(SubDomainTask, SubNTopleaves * sizeof(int)); +} + +/*! \brief Determines extent of local data and writes it to global variables. + * + * \return void + */ +void subfind_coll_findExtent(void) +{ + int i, j; + double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3]; + + /* determine extension */ + for(i = 0; i < 3; i++) + { + xmin[i] = MAX_REAL_NUMBER; + xmax[i] = -MAX_REAL_NUMBER; + } + + for(i = 0; i < NumPart; i++) + { + if(PS[i].GrNr == GrNr) + { + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + if(xmin[j] > PS[i].Center[j]) + xmin[j] = PS[i].Center[j]; + + if(xmax[j] < PS[i].Center[j]) + xmax[j] = PS[i].Center[j]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + if(xmin[j] > P[i].Pos[j]) + xmin[j] = P[i].Pos[j]; + + if(xmax[j] < P[i].Pos[j]) + xmax[j] = P[i].Pos[j]; + } + } + } + } + + MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, SubComm); + MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, SubComm); + + len = 0; + for(j = 0; j < 3; j++) + if(xmax_glob[j] - xmin_glob[j] > len) + len = xmax_glob[j] - xmin_glob[j]; + + len *= 1.001; + + SubDomainLen = len; + SubDomainInverseLen = 1.0 / SubDomainLen; + SubDomainFac = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION)); + SubDomainBigFac = (SubDomainLen / (((long long)1) << 52)); + + for(j = 0; j < 3; j++) + { + SubDomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]); + SubDomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len; + } +} + +/*! \brief Determines extent of the subfind top-tree. + * + * \return void + */ +int subfind_coll_domain_determineTopTree(void) +{ + int i, count; + + mp = (struct domain_peano_hilbert_data *)mymalloc("mp", sizeof(struct domain_peano_hilbert_data) * NumPartGroup); + + for(i = 0, count = 0; i < NumPart; i++) + { + if(PS[i].GrNr == GrNr) + { + peano1D xb, yb, zb; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + xb = domain_double_to_int(((PS[i].Center[0] - SubDomainCorner[0]) * SubDomainInverseLen) + 1.0); + yb = domain_double_to_int(((PS[i].Center[1] - SubDomainCorner[1]) * SubDomainInverseLen) + 1.0); + zb = domain_double_to_int(((PS[i].Center[2] - SubDomainCorner[2]) * SubDomainInverseLen) + 1.0); + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + xb = domain_double_to_int(((P[i].Pos[0] - SubDomainCorner[0]) * SubDomainInverseLen) + 1.0); + yb = domain_double_to_int(((P[i].Pos[1] - SubDomainCorner[1]) * SubDomainInverseLen) + 1.0); + zb = domain_double_to_int(((P[i].Pos[2] - SubDomainCorner[2]) * SubDomainInverseLen) + 1.0); + } + + mp[count].key = Key[i] = peano_hilbert_key(xb, yb, zb, BITS_PER_DIMENSION); + mp[count].index = i; + count++; + } + } + + if(count != NumPartGroup) + terminate("cost != NumPartGroup"); + + mysort_domain(mp, count, sizeof(struct domain_peano_hilbert_data)); + + SubNTopnodes = 1; + SubNTopleaves = 1; + Sub_LocTopNodes[0].Daughter = -1; + Sub_LocTopNodes[0].Parent = -1; + Sub_LocTopNodes[0].Size = PEANOCELLS; + Sub_LocTopNodes[0].StartKey = 0; + Sub_LocTopNodes[0].PIndex = 0; + Sub_LocTopNodes[0].Cost = NumPartGroup; + Sub_LocTopNodes[0].Count = NumPartGroup; + + int limitNTopNodes = 2 * imax(1 + (NTask / 7 + 1) * 8, All.TopNodeFactor * SubNTask); + + if(limitNTopNodes > MaxTopNodes) + terminate("limitNTopNodes > MaxTopNodes"); + + RB_INIT(&queue_load); + nload = mymalloc("nload", limitNTopNodes * sizeof(struct mydata)); + int *list = mymalloc("list", limitNTopNodes * sizeof(int)); + + double limit = 1.0 / (All.TopNodeFactor * SubNTask); + + /* insert the root node */ + nload[0].workload = 1.0; + nload[0].topnode_index = 0; + RB_INSERT(mytree, &queue_load, &nload[0]); + + int iter = 0; + + do + { + count = 0; + + double first_workload = 0; + + for(struct mydata *nfirst = RB_MIN(mytree, &queue_load); nfirst != NULL; nfirst = RB_NEXT(mytree, &queue_load, nfirst)) + { + if(Sub_LocTopNodes[nfirst->topnode_index].Size >= 8) + { + first_workload = nfirst->workload; + break; + } + } + + for(struct mydata *np = RB_MIN(mytree, &queue_load); np != NULL; np = RB_NEXT(mytree, &queue_load, np)) + { + if(np->workload < 0.125 * first_workload) + break; + + if(SubNTopnodes + 8 * (count + 1) >= limitNTopNodes) + break; + + if(np->workload > limit || (SubNTopleaves < SubNTask && count == 0)) + { + if(Sub_LocTopNodes[np->topnode_index].Size >= 8) + { + list[count] = np->topnode_index; + count++; + } + } + } + + if(count > 0) + { + subfind_domain_do_local_refine(count, list); + iter++; + } + } + while(count > 0); + + myfree(list); + myfree(nload); + myfree(mp); + + /* count toplevel leaves */ + + /* count the number of top leaves */ + SubNTopleaves = 0; + subfind_coll_domain_walktoptree(0); + + if(SubNTopleaves < SubNTask) + terminate("SubNTopleaves = %d < SubNTask = %d", SubNTopleaves, SubNTask); + + return 0; +} + +/*! \brief Refines top-tree locally. + * + * \param[in] n Number of new nodes. + * \param[in] list Array with indices of new nodes. + * + * \return void + */ +void subfind_domain_do_local_refine(int n, int *list) +{ + double *worktotlist = mymalloc("worktotlist", 8 * n * sizeof(double)); + double *worklist = mymalloc("worklist", 8 * n * sizeof(double)); + + /* create the new nodes */ + for(int k = 0; k < n; k++) + { + int i = list[k]; + + Sub_LocTopNodes[i].Daughter = SubNTopnodes; + SubNTopnodes += 8; + SubNTopleaves += 7; + + for(int j = 0; j < 8; j++) + { + int sub = Sub_LocTopNodes[i].Daughter + j; + + Sub_LocTopNodes[sub].Daughter = -1; + Sub_LocTopNodes[sub].Parent = i; + Sub_LocTopNodes[sub].Size = (Sub_LocTopNodes[i].Size >> 3); + Sub_LocTopNodes[sub].StartKey = Sub_LocTopNodes[i].StartKey + j * Sub_LocTopNodes[sub].Size; + Sub_LocTopNodes[sub].PIndex = Sub_LocTopNodes[i].PIndex; + Sub_LocTopNodes[sub].Cost = 0; + Sub_LocTopNodes[sub].Count = 0; + } + + int sub = Sub_LocTopNodes[i].Daughter; + + for(int p = Sub_LocTopNodes[i].PIndex, j = 0; p < Sub_LocTopNodes[i].PIndex + Sub_LocTopNodes[i].Count; p++) + { + if(PS[mp[p].index].GrNr != GrNr) + terminate("Houston, we have a problem."); + + if(j < 7) + while(mp[p].key >= Sub_LocTopNodes[sub + 1].StartKey) + { + j++; + sub++; + Sub_LocTopNodes[sub].PIndex = p; + if(j >= 7) + break; + } + + Sub_LocTopNodes[sub].Count++; + Sub_LocTopNodes[sub].Cost++; + } + + for(int j = 0; j < 8; j++) + { + sub = Sub_LocTopNodes[i].Daughter + j; + worklist[k * 8 + j] = fac_work * Sub_LocTopNodes[sub].Cost + fac_load * Sub_LocTopNodes[sub].Count; + } + } + + MPI_Allreduce(worklist, worktotlist, 8 * n, MPI_DOUBLE, MPI_SUM, SubComm); + + for(int k = 0; k < n; k++) + { + int i = list[k]; + RB_REMOVE(mytree, &queue_load, &nload[i]); + } + + for(int k = 0, l = 0; k < n; k++) + { + int i = list[k]; + + for(int j = 0; j < 8; j++, l++) + { + int sub = Sub_LocTopNodes[i].Daughter + j; + + /* insert the node */ + nload[sub].workload = worktotlist[l]; + nload[sub].topnode_index = sub; + RB_INSERT(mytree, &queue_load, &nload[sub]); + } + } + + myfree(worklist); + myfree(worktotlist); +} + +/*! \brief Walk the top tree and set reference to leaf node. + * + * \param[in] no Node index. + * + * \return void + */ +void subfind_coll_domain_walktoptree(int no) +{ + int i; + + if(Sub_LocTopNodes[no].Daughter == -1) + { + Sub_LocTopNodes[no].Leaf = SubNTopleaves; + SubNTopleaves++; + } + else + { + for(i = 0; i < 8; i++) + subfind_coll_domain_walktoptree(Sub_LocTopNodes[no].Daughter + i); + } +} + +/*! \brief Uses the cumulative cost function (which weights work-load and + * memory-load equally) to subdivide the list of top-level leave + * nodes into pieces that are (approximately) equal in size. + * + * \param[in] ncpu Number of tasks. + * \param[in] ndomain Number of domains. + * + * \return void + */ +void subfind_coll_domain_combine_topleaves_to_domains(int ncpu, int ndomain) +{ + int i, j, start, end, n, no; + double work, workavg, work_before, workavg_before, workhalfnode; + float *domainWork, *local_domainWork; + int *domainCount, *local_domainCount; + + /* sum the costs for each top leave */ + + domainWork = (float *)mymalloc("local_domainWork", SubNTopleaves * sizeof(float)); + domainCount = (int *)mymalloc("local_domainCount", SubNTopleaves * sizeof(int)); + + local_domainWork = (float *)mymalloc("local_domainWork", SubNTopleaves * sizeof(float)); + local_domainCount = (int *)mymalloc("local_domainCount", SubNTopleaves * sizeof(int)); + + for(i = 0; i < SubNTopleaves; i++) + { + local_domainWork[i] = 0; + local_domainCount[i] = 0; + } + + /* find for each particle its top-leave, and then add the associated cost with it */ + for(n = 0; n < NumPart; n++) + { + if(PS[n].GrNr == GrNr) + { + no = 0; + while(Sub_LocTopNodes[no].Daughter >= 0) + no = Sub_LocTopNodes[no].Daughter + (Key[n] - Sub_LocTopNodes[no].StartKey) / (Sub_LocTopNodes[no].Size >> 3); + + no = Sub_LocTopNodes[no].Leaf; + + local_domainCount[no] += 1; + local_domainWork[no] += 1; + } + } + + MPI_Allreduce(local_domainWork, domainWork, SubNTopleaves, MPI_FLOAT, MPI_SUM, SubComm); + MPI_Allreduce(local_domainCount, domainCount, SubNTopleaves, MPI_INT, MPI_SUM, SubComm); + + myfree(local_domainCount); + myfree(local_domainWork); + + /* now combine the top leaves to form the individual domains */ + + workhalfnode = 0.5 / ndomain; + workavg = 1.0 / ncpu; + work_before = workavg_before = 0; + + start = 0; + + for(i = 0; i < ncpu; i++) + { + work = 0; + end = start; + + work += fac_work * domainWork[end] + fac_load * domainCount[end]; + + while((work + work_before + (end + 1 < ndomain ? fac_work * domainWork[end + 1] + fac_load * domainCount[end + 1] : 0) < + workavg + workavg_before + workhalfnode) || + (i == ncpu - 1 && end < ndomain - 1)) + { + if((ndomain - end) > (ncpu - i)) + end++; + else + break; + + work += fac_work * domainWork[end] + fac_load * domainCount[end]; + } + + for(j = start; j <= end; j++) + SubDomainTask[j] = i; + + work_before += work; + workavg_before += workavg; + start = end + 1; + } + + myfree(domainCount); + myfree(domainWork); +} + +/*! \brief Allocates all the stuff that will be required for the + * tree-construction/walk later on. + * + * \return void + */ +void subfind_coll_domain_allocate(void) +{ + MaxTopNodes = (int)(All.TopNodeAllocFactor * All.MaxPart + 1); + + if(SubDomainTask) + terminate("subfind collective domain storage already allocated"); + + SubTopNodes = (struct topnode_data *)mymalloc_movable(&SubTopNodes, "SubTopNodes", (MaxTopNodes * sizeof(struct topnode_data))); + SubDomainTask = (int *)mymalloc_movable(&SubDomainTask, "SubDomainTask", (MaxTopNodes * sizeof(int))); +} + +/*! \brief Free memory used for subfind collective domain decomposition. + * + * \return void + */ +void subfind_coll_domain_free(void) +{ + if(!SubDomainTask) + terminate("subfind collective domain storage not allocated"); + + myfree(SubDomainTask); + myfree(SubTopNodes); + + SubDomainTask = NULL; + SubTopNodes = NULL; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_coll_tree.c b/src/amuse/community/arepo/src/subfind/subfind_coll_tree.c new file mode 100644 index 0000000000..96d7db4b07 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_coll_tree.c @@ -0,0 +1,992 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_coll_tree.c + * \date 05/2018 + * \brief Functions for tree-construction for subfind collective. + * \details contains functions: + * int subfind_coll_treebuild(int npart, struct unbind_data *mp) + * int subfind_coll_treebuild_construct(int npart, struct + * unbind_data *mp) + * int subfind_coll_treebuild_insert_single_point(int i, + * unsigned long long *intpos, int th, unsigned char levels) + * int subfind_coll_create_empty_nodes(int no, int topnode, + * int bits, int x, int y, int z, unsigned long long xc, + * unsigned long long yc, unsigned long long zc, + * unsigned long long ilen) + * void subfind_coll_insert_pseudo_particles(void) + * void subfind_coll_update_node_recursive(int no, int sib, + * int father, int *last) + * void subfind_coll_exchange_topleafdata(void) + * void subfind_coll_treeupdate_toplevel(int no, int topnode, + * int bits, int x, int y, int z) + * void subfind_coll_treeallocate(int maxpart, int maxindex) + * void subfind_coll_treefree(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../gravity/forcetree.h" +#include "subfind.h" + +/*! \brief Main function to build subfind collective tree. + * + * \param[in] npart Number of particles. + * \param[in] mp Unbind data. + * + * \return Number of nodes in tree. + */ +int subfind_coll_treebuild(int npart, struct unbind_data *mp) +{ + int flag; + + do + { + int flag_single = subfind_coll_treebuild_construct(npart, mp); + + MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, SubComm); + + if(flag < 0) + { + subfind_coll_treefree(); + + SubTreeAllocFactor *= 1.15; + + printf("SUBFIND-COLLECTIVE, root-task=%d: Increasing TreeAllocFactor, new value=%g\n", ThisTask, SubTreeAllocFactor); + fflush(stdout); + + subfind_coll_treeallocate(NumPart, All.MaxPart); + } + } + while(flag < 0); + + /* insert the pseudo particles that represent the mass distribution of other domains */ + subfind_coll_insert_pseudo_particles(); + + /* now compute the multipole moments recursively */ + int last = -1; + + subfind_coll_update_node_recursive(SubTree_MaxPart, -1, -1, &last); + + if(last >= SubTree_MaxPart) + { + if(last >= SubTree_MaxPart + SubTree_MaxNodes) /* a pseudo-particle or imported particle */ + SubNextnode[last - SubTree_MaxNodes] = -1; + else + SubNodes[last].u.d.nextnode = -1; + } + else + SubNextnode[last] = -1; + + subfind_coll_exchange_topleafdata(); + + SubTree_NextFreeNode = SubTree_MaxPart + 1; + + subfind_coll_treeupdate_toplevel(SubTree_MaxPart, 0, 1, 0, 0, 0); + + return SubTree_NumNodes; +} + +/*! \brief Constructs the collective subfind oct-tree. + * + * The index convention for accessing tree nodes is the following: + * node index + * [0...SubTree_MaxPart-1] references single particles, the indices + * [SubTree_MaxPart...SubTree_MaxPart+SubTree_MaxNodes-1] references tree + * nodes. + * [SubTree_MaxPart+SubTree_MaxNodes... + * SubTree_MaxPart+SubTree_MaxNodes+NTopleaves-1] references "pseudo + * particles", i.e. mark branches on foreign CPUs + * [SubTree_MaxPart+SubTree_MaxNodes+NTopleaves... + * SubTree_MaxPart+SubTree_MaxNodes+NTopleaves+0-1] references imported points + * + * `Nodes_base' points to the first tree node, while `Nodes' is shifted such + * that SubNodes[SubTree_MaxPart] gives the root tree node. + * + * \param[in] npart Number of particles. + * \param[in] mp Unbind data. + * + * \return Number of nodes. + */ +int subfind_coll_treebuild_construct(int npart, struct unbind_data *mp) +{ + int i, j, k, no, flag_full = 0; + unsigned long long *intposp; + MyDouble *posp; + unsigned long long ibaselen = ((unsigned long long)1) << 52; + + /* create an empty root node */ + SubTree_NextFreeNode = SubTree_MaxPart; /* index of first free node */ + struct NODE *nfreep = &SubNodes[SubTree_NextFreeNode]; /* select first node */ + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + nfreep->len = SubDomainLen; + for(j = 0; j < 3; j++) + nfreep->center[j] = SubDomainCenter[j]; + + SubTree_NumNodes = 1; + SubTree_NextFreeNode++; + + /* create a set of empty nodes corresponding to the top-level domain + * grid. We need to generate these nodes first to make sure that we have a + * complete top-level tree which allows the easy insertion of the + * pseudo-particles at the right place + */ + if(subfind_coll_create_empty_nodes(SubTree_MaxPart, 0, 1, 0, 0, 0, 0, 0, 0, ibaselen) < 0) + return -1; + + SubTree_FirstNonTopLevelNode = SubTree_NextFreeNode; + + /* if a high-resolution region in a global tree is used, we need to generate + * an additional set empty nodes to make sure that we have a complete + * top-level tree for the high-resolution inset + */ + + SubTree_IntPos_list = + (unsigned long long *)mymalloc_movable(&SubTree_IntPos_list, "SubTree_IntPos_list", 3 * NumPart * sizeof(unsigned long long)); + + SubTree_ImportedNodeOffset = SubTree_MaxPart + SubTree_MaxNodes + SubNTopleaves; + + /* now we determine for each point the insertion top-level node, and the task on which this lies */ + for(i = 0; i < npart; i++) + { + for(j = 0; j < 3; j++) + { + if(mp) + k = mp[i].index; + else + k = i; + +#ifdef CELL_CENTER_GRAVITY + if(P[k].Type == 0) + posp = &PS[k].Center[j]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + posp = &P[k].Pos[j]; + + if(*posp < SubDomainCorner[j] || *posp >= SubDomainCorner[j] + SubDomainLen) + { + terminate("out of box i=%d j=%d coord=%g SubDomainCorner=(%g|%g|%g) SubDomainLen=%g", i, j, *posp, SubDomainCorner[0], + SubDomainCorner[1], SubDomainCorner[2], SubDomainLen); + } + + SubTree_Pos_list[3 * k + j] = *posp; + } + } + + for(i = 0; i < npart; i++) + { + if(mp) + k = mp[i].index; + else + k = i; + + posp = &SubTree_Pos_list[3 * k]; + + unsigned long long xxb = force_double_to_int(((*posp++ - SubDomainCorner[0]) * SubDomainInverseLen) + 1.0); + unsigned long long yyb = force_double_to_int(((*posp++ - SubDomainCorner[1]) * SubDomainInverseLen) + 1.0); + unsigned long long zzb = force_double_to_int(((*posp++ - SubDomainCorner[2]) * SubDomainInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + unsigned char levels = 0; + + intposp = &SubTree_IntPos_list[3 * k]; + + *intposp++ = xxb; + *intposp++ = yyb; + *intposp++ = zzb; + + no = 0; + while(SubTopNodes[no].Daughter >= 0) + { + unsigned char subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + mask >>= 1; + levels++; + + no = SubTopNodes[no].Daughter + SubTopNodes[no].MortonToPeanoSubnode[subnode]; + } + + no = SubTopNodes[no].Leaf; + + if(no >= SubTree_ImportedNodeOffset) + terminate("i=%d: no=%d SubTree_ImportedNodeOffset=%d", i, no, SubTree_ImportedNodeOffset); + + if(subfind_coll_treebuild_insert_single_point(k, &SubTree_IntPos_list[3 * k], SubDomainNodeIndex[no], levels) < 0) + { + flag_full = 1; + break; + } + } + + myfree_movable(SubTree_IntPos_list); + + if(flag_full) + return -1; + + return SubTree_NumNodes; +} + +/*! \brief Inserts single point in tree. + * + * \param[in] i Index of particle. + * \param[in] intpos Integer position. + * \param[in] th Index in SubNodes. + * \param[in] levels Level corresponding to subnode. + * + * \return void + */ +int subfind_coll_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char levels) +{ + int j, parent = -1; + unsigned char subnode = 0; + unsigned long long xxb = intpos[0]; + unsigned long long yyb = intpos[1]; + unsigned long long zzb = intpos[2]; + unsigned long long mask = ((unsigned long long)1) << ((52 - 1) - levels); + unsigned char shiftx = (52 - 1) - levels; + unsigned char shifty = (52 - 2) - levels; + unsigned char shiftz = (52 - 3) - levels; + signed long long centermask = (0xFFF0000000000000llu); + unsigned long long *intppos; + centermask >>= levels; + + while(1) + { + if(th >= SubTree_MaxPart && th < SubTree_ImportedNodeOffset) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEEL < gravitational softening length + */ + for(j = 0; j < 8; j++) + { + if(SubNodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + int nn = SubNodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; + th = nn; + } + else + { + /* here we have found an empty slot where we can attach + * the new particle as a leaf. + */ + SubNodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* We try to insert into a leaf with a single particle. Need + * to generate a new internal node at this point. + */ + SubNodes[parent].u.suns[subnode] = SubTree_NextFreeNode; + struct NODE *nfreep = &SubNodes[SubTree_NextFreeNode]; + + /* the other is: */ + double len = ((double)(mask << 1)) * SubDomainBigFac; + double cx = ((double)((xxb & centermask) | mask)) * SubDomainBigFac + SubDomainCorner[0]; + double cy = ((double)((yyb & centermask) | mask)) * SubDomainBigFac + SubDomainCorner[1]; + double cz = ((double)((zzb & centermask) | mask)) * SubDomainBigFac + SubDomainCorner[2]; + + nfreep->len = len; + nfreep->center[0] = cx; + nfreep->center[1] = cy; + nfreep->center[2] = cz; + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + if(th >= SubTree_ImportedNodeOffset) + { + terminate("unexpected here: th=%d SubTree_ImportedNodeOffset=%d", th, SubTree_ImportedNodeOffset); + } + else + intppos = &SubTree_IntPos_list[3 * th]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = th; + + th = SubTree_NextFreeNode; /* resume trying to insert the new particle the newly created internal node */ + SubTree_NumNodes++; + SubTree_NextFreeNode++; + + if(SubTree_NumNodes >= SubTree_MaxNodes) + { + if(SubTreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + char buf[500]; + sprintf(buf, + "task %d: looks like a serious problem for particle %d, stopping with particle dump. SubTree_NumNodes=%d " + "SubTree_MaxNodes=%d 0=%d NumPart=%d\n", + SubThisTask, i, SubTree_NumNodes, SubTree_MaxNodes, 0, NumPart); + dump_particles(); + terminate(buf); + } + + return -1; + } + } + } + + return 0; +} + +/*! \brief Recursively creates a set of empty tree nodes which corresponds to + * the top-level tree for the domain grid. This is done to ensure that + * this top-level tree is always "complete" so that we can easily + * associate the pseudo-particles of other CPUs with tree-nodes at a + * given level in the tree, even when the particle population is so + * sparse that some of these nodes are actually empty. + * + * \param[in] no Index of node. + * \param[in] topnode Index of topnode. + * \param[in] bits Number of bits used for Peano key. + * \param[in] x Integer x position. + * \param[in] y Integer y position. + * \param[in] z Integer z position. + * \param[in] xc X position of corner. + * \param[in] yc Y position of corner. + * \param[in] zc Z position of corner. + * \param[in] ilen Sidelength. + * + * \return 0: success; -1 Number of nodes exceeds maximum number of nodes. + */ +int subfind_coll_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z, unsigned long long xc, unsigned long long yc, + unsigned long long zc, unsigned long long ilen) +{ + int i, j, k, n, sub, count; + unsigned long long xxc, yyc, zzc, ilenhalf; + + ilen >>= 1; + + if(SubTopNodes[topnode].Daughter >= 0) + { + for(i = 0; i < 2; i++) + for(j = 0; j < 2; j++) + for(k = 0; k < 2; k++) + { + if(SubTree_NumNodes >= SubTree_MaxNodes) + { + if(SubTreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + char buf[500]; + sprintf(buf, "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", SubThisTask, + NTopnodes); + dump_particles(); + terminate(buf); + } + return -1; + } + + sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + count = i + 2 * j + 4 * k; + + SubNodes[no].u.suns[count] = SubTree_NextFreeNode; + + xxc = xc + i * ilen; + yyc = yc + j * ilen; + zzc = zc + k * ilen; + ilenhalf = ilen >> 1; + + double len = ((double)ilen) * SubDomainBigFac; + double cx = ((double)(xxc + ilenhalf)) * SubDomainBigFac + SubDomainCorner[0]; + double cy = ((double)(yyc + ilenhalf)) * SubDomainBigFac + SubDomainCorner[1]; + double cz = ((double)(zzc + ilenhalf)) * SubDomainBigFac + SubDomainCorner[2]; + + SubNodes[SubTree_NextFreeNode].len = len; + SubNodes[SubTree_NextFreeNode].center[0] = cx; + SubNodes[SubTree_NextFreeNode].center[1] = cy; + SubNodes[SubTree_NextFreeNode].center[2] = cz; + + for(n = 0; n < 8; n++) + SubNodes[SubTree_NextFreeNode].u.suns[n] = -1; + + if(SubTopNodes[SubTopNodes[topnode].Daughter + sub].Daughter == -1) + SubDomainNodeIndex[SubTopNodes[SubTopNodes[topnode].Daughter + sub].Leaf] = SubTree_NextFreeNode; + + SubTree_NextFreeNode++; + SubTree_NumNodes++; + + if(subfind_coll_create_empty_nodes(SubTree_NextFreeNode - 1, SubTopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, + 2 * y + j, 2 * z + k, xxc, yyc, zzc, ilen) < 0) + return -1; + } + } + + return 0; +} + +/*! \brief Inserts pseudo-particles which will represent the mass + * distribution of the other CPUs. Initially, the mass of the + * pseudo-particles is set to zero, and their coordinate is set to the + * center of the domain-cell they correspond to. These quantities will + * be updated later on. + * + * \return void + */ +void subfind_coll_insert_pseudo_particles(void) +{ + int i, index; + + for(i = 0; i < SubNTopleaves; i++) + { + index = SubDomainNodeIndex[i]; + + if(SubDomainTask[i] != SubThisTask) + SubNodes[index].u.suns[0] = SubTree_MaxPart + SubTree_MaxNodes + i; + } +} + +/*! \brief Determines the multipole moments for a given internal node + * and all its subnodes using a recursive computation. The result is + * stored in the SubNodes structure in the sequence of this tree-walk. + * + * \param[in] no Index of node. + * \param[in] sib Index of sibling. + * \param[in] father Index of parent node. + * \param[in, out] last Node index of last call. + * + * \return void + */ +void subfind_coll_update_node_recursive(int no, int sib, int father, int *last) +{ + int j, jj, p, pp, nextsib, suns[8]; + double s[3], mass; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(no >= SubTree_MaxPart && no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + for(j = 0; j < 8; j++) + suns[j] = SubNodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + if(*last >= 0) + { + if(*last >= SubTree_MaxPart) + { + if(*last >= SubTree_MaxPart + SubTree_MaxNodes) + SubNextnode[*last - SubTree_MaxNodes] = no; /* a pseudo-particle or imported point */ + else + SubNodes[*last].u.d.nextnode = no; + } + else + SubNextnode[*last] = no; + } + + *last = no; + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; + +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + subfind_coll_update_node_recursive(p, nextsib, no, last); + + if(p < SubTree_MaxPart) /* a particle */ + { + MyDouble *pos = &SubTree_Pos_list[3 * p]; + + mass += P[p].Mass; + s[0] += P[p].Mass * pos[0]; + s[1] += P[p].Mass * pos[1]; + s[2] += P[p].Mass * pos[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[P[p].SofteningType]) + maxsofttype = P[p].SofteningType; + +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[P[p].Type == 0 ? 0 : P[p].SofteningType] += P[p].Mass; + + if(P[p].Type == 0) + { + if(maxhydrosofttype < P[p].SofteningType) + maxhydrosofttype = P[p].SofteningType; + if(minhydrosofttype > P[p].SofteningType) + minhydrosofttype = P[p].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[P[p].SofteningType] += P[p].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else if(p < SubTree_MaxPart + SubTree_MaxNodes) /* an internal node */ + { + mass += SubNodes[p].u.d.mass; + s[0] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[0]; + s[1] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[1]; + s[2] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[SubNodes[p].u.d.maxsofttype]) + maxsofttype = SubNodes[p].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += SubExtNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < SubNodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = SubNodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > SubNodes[p].u.d.minhydrosofttype) + minhydrosofttype = SubNodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else if(p < SubTree_MaxPart + SubTree_MaxNodes + SubNTopleaves) /* a pseudo particle */ + { + /* nothing to be done here because the mass of the + * pseudo-particle is still zero. This will be changed + * later. + */ + } + else + { + /* an imported point */ + terminate("should not occur here"); + } + } + } + + if(mass) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = SubNodes[no].center[0]; + s[1] = SubNodes[no].center[1]; + s[2] = SubNodes[no].center[2]; + } + + SubNodes[no].u.d.mass = mass; + SubNodes[no].u.d.s[0] = s[0]; + SubNodes[no].u.d.s[1] = s[1]; + SubNodes[no].u.d.s[2] = s[2]; + SubNodes[no].u.d.maxsofttype = maxsofttype; + SubNodes[no].u.d.sibling = sib; + SubNodes[no].u.d.father = father; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + SubExtNodes[no].mass_per_type[k] = mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + SubNodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + SubNodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else /* single particle or pseudo particle */ + { + if(*last >= 0) + { + if(*last >= SubTree_MaxPart) + { + if(*last >= SubTree_MaxPart + SubTree_MaxNodes) + SubNextnode[*last - SubTree_MaxNodes] = no; /* a pseudo-particle or an imported point */ + else + SubNodes[*last].u.d.nextnode = no; + } + else + SubNextnode[*last] = no; + } + + *last = no; + } +} + +/*! \brief This function communicates the values of the multipole moments of + * the top-level tree-nodes of the domain grid. This data can then be + * used to update the pseudo-particles on each CPU accordingly. + * + * \return void + */ +void subfind_coll_exchange_topleafdata(void) +{ + int n, no, idx, task; + int *recvcounts, *recvoffset, *bytecounts, *byteoffset; + struct DomainNODE + { + MyFloat s[3]; + MyFloat mass; +#ifdef MULTIPLE_NODE_SOFTENING + MyDouble mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + unsigned char maxsofttype; + } * DomainMoment, *loc_DomainMoment; + + DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", SubNTopleaves * sizeof(struct DomainNODE)); + + /* share the pseudo-particle data accross CPUs */ + recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * SubNTask); + recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * SubNTask); + bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * SubNTask); + byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * SubNTask); + + for(task = 0; task < SubNTask; task++) + recvcounts[task] = 0; + + for(n = 0; n < SubNTopleaves; n++) + { + if(SubDomainTask[n] < 0 || SubDomainTask[n] >= SubNTask) + terminate("n=%d|%d: SubDomainTask[n]=%d", n, SubNTopleaves, SubDomainTask[n]); + + recvcounts[SubDomainTask[n]]++; + } + + for(task = 0; task < SubNTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + for(task = 1, recvoffset[0] = 0, byteoffset[0] = 0; task < SubNTask; task++) + { + recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1]; + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + } + + loc_DomainMoment = (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[SubThisTask] * sizeof(struct DomainNODE)); + + for(n = 0, idx = 0; n < SubNTopleaves; n++) + { + if(SubDomainTask[n] == SubThisTask) + { + no = SubDomainNodeIndex[n]; + + /* read out the multipole moments from the local base cells */ + loc_DomainMoment[idx].s[0] = SubNodes[no].u.d.s[0]; + loc_DomainMoment[idx].s[1] = SubNodes[no].u.d.s[1]; + loc_DomainMoment[idx].s[2] = SubNodes[no].u.d.s[2]; + loc_DomainMoment[idx].mass = SubNodes[no].u.d.mass; + loc_DomainMoment[idx].maxsofttype = SubNodes[no].u.d.maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + loc_DomainMoment[idx].mass_per_type[k] = SubExtNodes[no].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + loc_DomainMoment[idx].maxhydrosofttype = SubNodes[no].u.d.maxhydrosofttype; + loc_DomainMoment[idx].minhydrosofttype = SubNodes[no].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + idx++; + } + } + + MPI_Allgatherv(loc_DomainMoment, bytecounts[SubThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, SubComm); + + for(task = 0; task < SubNTask; task++) + recvcounts[task] = 0; + + for(n = 0; n < SubNTopleaves; n++) + { + task = SubDomainTask[n]; + if(task != SubThisTask) + { + no = SubDomainNodeIndex[n]; + idx = recvoffset[task] + recvcounts[task]++; + + SubNodes[no].u.d.s[0] = DomainMoment[idx].s[0]; + SubNodes[no].u.d.s[1] = DomainMoment[idx].s[1]; + SubNodes[no].u.d.s[2] = DomainMoment[idx].s[2]; + SubNodes[no].u.d.mass = DomainMoment[idx].mass; + SubNodes[no].u.d.maxsofttype = DomainMoment[idx].maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + SubExtNodes[no].mass_per_type[k] = DomainMoment[idx].mass_per_type[k]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + SubNodes[no].u.d.maxhydrosofttype = DomainMoment[idx].maxhydrosofttype; + SubNodes[no].u.d.minhydrosofttype = DomainMoment[idx].minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvoffset); + myfree(recvcounts); + myfree(DomainMoment); +} + +/*! \brief This function updates the top-level tree after the multipole + * moments of the pseudo-particles have been updated. + * + * \param[in] no Index of node. + * \param[in] topnode Index of topnode. + * \param[in] bits Number of bits used. + * \param[in] x Integer x position. + * \param[in] y Integer y position. + * \param[in] z Integer z position. + * + * \return void + */ +void subfind_coll_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z) +{ + int i, j, k, sub; + int p; + double s[3], mass; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(SubTopNodes[topnode].Daughter >= 0) + { + for(i = 0; i < 2; i++) + for(j = 0; j < 2; j++) + for(k = 0; k < 2; k++) + { + sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + SubTree_NextFreeNode++; + subfind_coll_treeupdate_toplevel(SubTree_NextFreeNode - 1, SubTopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, + 2 * y + j, 2 * z + k); + } + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + p = SubNodes[no].u.d.nextnode; + + for(j = 0; j < 8; j++) /* since we are dealing with top-level nodes, we know that there are 8 consecutive daughter nodes */ + { + if(p >= SubTree_MaxPart && p < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + mass += SubNodes[p].u.d.mass; + s[0] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[0]; + s[1] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[1]; + s[2] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[2]; + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[SubNodes[p].u.d.maxsofttype]) + maxsofttype = SubNodes[p].u.d.maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += SubExtNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < SubNodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = SubNodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > SubNodes[p].u.d.minhydrosofttype) + minhydrosofttype = SubNodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else + terminate("may not happen"); + + p = SubNodes[p].u.d.sibling; + } + + if(mass) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = SubNodes[no].center[0]; + s[1] = SubNodes[no].center[1]; + s[2] = SubNodes[no].center[2]; + } + + SubNodes[no].u.d.s[0] = s[0]; + SubNodes[no].u.d.s[1] = s[1]; + SubNodes[no].u.d.s[2] = s[2]; + SubNodes[no].u.d.mass = mass; + SubNodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + SubExtNodes[no].mass_per_type[k] = mass_per_type[k]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + SubNodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + SubNodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } +} + +/*! \brief Allocates tree arrays. + * + * This function allocates the memory used for storage of the tree nodes. + * Usually, the number of required nodes is of order 0.7*maxpart, but if this + * is insufficient, the code will try to allocated more space. + * + * \param[in] maxpart Maximum number of nodes. + * \param[in] maxindex Maximum number of particles. + * + * \return void + */ +void subfind_coll_treeallocate(int maxpart, int maxindex) +{ + if(SubNodes) + terminate("already allocated"); + + SubTree_MaxPart = maxindex; + SubTree_MaxNodes = (int)(SubTreeAllocFactor * maxpart) + SubNTopnodes; + + SubDomainNodeIndex = (int *)mymalloc_movable(&SubDomainNodeIndex, "SubDomainNodeIndex", SubNTopleaves * sizeof(int)); + + SubTree_Pos_list = (MyDouble *)mymalloc_movable(&SubTree_Pos_list, "SubTree_Pos_list", 3 * maxpart * sizeof(MyDouble)); + + SubNodes = (struct NODE *)mymalloc_movable(&SubNodes, "SubNodes", (SubTree_MaxNodes + 1) * sizeof(struct NODE)); + SubNodes -= SubTree_MaxPart; + +#ifdef MULTIPLE_NODE_SOFTENING + SubExtNodes = (struct ExtNODE *)mymalloc_movable(&SubExtNodes, "SubExtNodes", (SubTree_MaxNodes + 1) * sizeof(struct ExtNODE)); + SubExtNodes -= SubTree_MaxPart; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + SubNextnode = (int *)mymalloc_movable(&SubNextnode, "SubNextnode", (SubTree_MaxPart + SubNTopleaves) * sizeof(int)); +} + +/*! \brief Free tree arrays. + * + * This function frees the memory allocated for the tree, i.e. it frees + * the space allocated by the function subfind_coll_treeallocate(). + * + * \return void + */ +void subfind_coll_treefree(void) +{ + if(SubNodes) + { + myfree(SubNextnode); + +#ifdef MULTIPLE_NODE_SOFTENING + myfree(SubExtNodes + SubTree_MaxPart); + SubExtNodes = NULL; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + myfree(SubNodes + SubTree_MaxPart); + myfree(SubTree_Pos_list); + myfree(SubDomainNodeIndex); + + SubNodes = NULL; + SubDomainNodeIndex = NULL; + SubNextnode = NULL; + SubTree_Pos_list = NULL; + } + else + terminate("trying to free the tree even though it's not allocated"); +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c b/src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c new file mode 100644 index 0000000000..1a7cbd67c7 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c @@ -0,0 +1,460 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_coll_treewalk.c + * \date 05/2018 + * \brief Algorithm for collective tree walk; computes gravitational + * binding energy. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void subfind_potential_compute(int num, struct unbind_data + * *darg, int phasearg, double weakly_bound_limit_arg) + * static int subfind_force_treeevaluate_potential(int target, + * int mode, int threadid) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../fof/fof.h" +#include "subfind.h" + +static int subfind_force_treeevaluate_potential(int target, int mode, int threadid); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + unsigned char SofteningType; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(int k = 0; k < 3; k++) + in->Pos[k] = PS[i].Center[k]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(int k = 0; k < 3; k++) + in->Pos[k] = P[i].Pos[k]; + } + + in->SofteningType = P[i].SofteningType; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Potential; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + PS[i].Potential = out->Potential; + } + else /* combine */ + { + PS[i].Potential += out->Potential; + } +} + +#define USE_SUBCOMM_COMMUNICATOR +#include "../utils/generic_comm_helpers2.h" + +static int Num; +static struct unbind_data *d; +static int phase; +static double weakly_bound_limit; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i, idx; + + { + int j, threadid = get_thread_num(); + + for(j = 0; j < SubNTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= Num) + break; + + i = d[idx].index; + + if(phase == 1) + if(PS[i].BindingEnergy <= weakly_bound_limit) + continue; + + subfind_force_treeevaluate_potential(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_force_treeevaluate_potential(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Computes potential energy. + * + * \param[in] num Number of elements. + * \param[in] darg Unbind data. + * \param[in] phasearg Which phase are we in? 1:ignore weakly bound particles. + * \param[in] weakly_bound_limit_arg Minimum binding energy between two + * particles that is accounted for. + * + * \return void + */ +void subfind_potential_compute(int num, struct unbind_data *darg, int phasearg, double weakly_bound_limit_arg) +{ + generic_set_MaxNexport(); + + Num = num; + d = darg; + phase = phasearg; + weakly_bound_limit = weakly_bound_limit_arg; + + generic_comm_pattern(Num, kernel_local, kernel_imported); + + double atime; + + if(All.ComovingIntegrationOn) + atime = All.Time; + else + atime = 1; + + for(int i = 0; i < num; i++) + { + if(phase == 1) + if(PS[d[i].index].BindingEnergy <= weakly_bound_limit) + continue; + + PS[d[i].index].Potential *= All.G / atime; + } +} + +/*! \brief Evaluate function of potential calculation. + * + * \param[in] target Index of particle/cell/imported data. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +static int subfind_force_treeevaluate_potential(int target, int mode, int threadid) +{ + struct NODE *nop = 0; + int no, numnodes, *firstnode, k; + double r2, dx, dy, dz, mass, r, u, h_i, h_j, hmax, h_inv, wp; + double pos_x, pos_y, pos_z; +#ifdef MULTIPLE_NODE_SOFTENING + struct ExtNODE *extnop = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ +#if !defined(GRAVITY_NOT_PERIODIC) + double xtmp, ytmp, ztmp; +#endif + + data_in local, *in; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + in = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + in = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos_x = in->Pos[0]; + pos_y = in->Pos[1]; + pos_z = in->Pos[2]; + h_i = All.ForceSoftening[in->SofteningType]; + + double pot = 0; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + no = SubTree_MaxPart; /* root node */ + else + { + no = firstnode[k]; + no = SubNodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { +#ifdef MULTIPLE_NODE_SOFTENING + int indi_flag1 = -1, indi_flag2 = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(no < SubTree_MaxPart) /* single particle */ + { + dx = GRAVITY_NEAREST_X(SubTree_Pos_list[3 * no + 0] - pos_x); + dy = GRAVITY_NEAREST_Y(SubTree_Pos_list[3 * no + 1] - pos_y); + dz = GRAVITY_NEAREST_Z(SubTree_Pos_list[3 * no + 2] - pos_z); + r2 = dx * dx + dy * dy + dz * dz; + + mass = P[no].Mass; + + h_j = All.ForceSoftening[P[no].SofteningType]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + + no = SubNextnode[no]; + } + else if(no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < SubTree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the + branch */ + break; + } + + nop = &SubNodes[no]; + mass = nop->u.d.mass; + + dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x); + dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y); + dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + /* check Barnes-Hut opening criterion */ + if(nop->len * nop->len > r2 * All.ErrTolThetaSubfind * All.ErrTolThetaSubfind) + { + /* open cell */ + if(mass) + { + no = nop->u.d.nextnode; + continue; + } + } + + h_j = All.ForceSoftening[nop->u.d.maxsofttype]; + + if(h_j > h_i) + { +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype) + if(SubExtNodes[no].mass_per_type[0] > 0) + if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype]) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + indi_flag1 = 0; + indi_flag2 = NSOFTTYPES; +#else /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(r2 < h_j * h_j) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */ + hmax = h_j; + } + else + hmax = h_i; + + /* node can be used */ +#ifdef MULTIPLE_NODE_SOFTENING + extnop = &SubExtNodes[no]; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + no = nop->u.d.sibling; + } + else if(no >= SubTree_ImportedNodeOffset) /* point from imported nodelist */ + { + terminate("this is not expected here"); + } + else + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + subfind_treefind_collective_export_node_threads(no, target, threadid); + + no = SubNextnode[no - SubTree_MaxNodes]; + continue; + } + + /* now evaluate the potential contribution */ + r = sqrt(r2); + +#ifdef MULTIPLE_NODE_SOFTENING + int type; + for(type = indi_flag1; type < indi_flag2; type++) + { + if(type >= 0) + { + mass = extnop->mass_per_type[type]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(type == 0) + h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype]; + else +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + h_j = All.ForceSoftening[type]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + } + + if(mass) + { +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(r >= hmax) + pot += FLT(-mass / r); + else + { + h_inv = 1.0 / hmax; + + u = r * h_inv; + if(u < 0.5) + wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6)); + else + wp = -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u))); + + pot += FLT(mass * h_inv * wp); + } +#ifdef MULTIPLE_NODE_SOFTENING + } + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + out.Potential = pot; + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_collective.c b/src/amuse/community/arepo/src/subfind/subfind_collective.c new file mode 100644 index 0000000000..02c702b6de --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_collective.c @@ -0,0 +1,2417 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_collective.c + * \date 05/2018 + * \brief Subfind algorithm running collectively on all tasks. + * \details contains functions: + * void subfind_process_group_collectively(int nsubgroups_cat) + * void subfind_fof_calc_am_collective(int snapnr, int + * ngroups_cat) + * void subfind_col_find_coll_candidates(int totgrouplen) + * void subfind_unbind_independent_ones(int count_cand) + * int subfind_col_unbind(struct unbind_data *d, int num, int + * *num_non_gas) + * void subfind_poll_for_requests(void) + * long long subfind_distlinklist_setrank_and_get_next( + * long long index, long long *rank) + * void subfind_distlinklist_set_next(long long index, + * long long next) + * void subfind_distlinklist_add_particle(long long index) + * void subfind_distlinklist_mark_particle(long long index, + * int target, int submark) + * void subfind_distlinklist_add_bound_particles( + * long long index, int nsub) + * long long subfind_distlinklist_get_next(long long index) + * long long subfind_distlinklist_get_rank(long long index) + * long long subfind_distlinklist_get_head(long long index) + * void subfind_distlinklist_get_two_heads(long long ngb_index1, + * long long ngb_index2, long long *head, long long + * *head_attach) + * void subfind_distlinklist_set_headandnext(long long index, + * long long head, long long next) + * int subfind_distlinklist_get_tail_set_tail_increaselen( + * long long index, long long *tail, long long newtail) + * void subfind_distlinklist_set_tailandlen(long long index, + * long long tail, int len) + * void subfind_distlinklist_get_tailandlen(long long index, + * long long *tail, int *len) + * void subfind_distlinklist_set_all(long long index, + * long long head, long long tail, int len, long long next) + * int subfind_compare_densities(const void *a, const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../fof/fof.h" +#include "subfind.h" + +#define TAG_POLLING_DONE 201 +#define TAG_SET_ALL 202 +#define TAG_GET_NGB_INDICES 204 +#define TAG_GET_TAILANDLEN 205 +#define TAG_GET_TAILANDLEN_DATA 206 +#define TAG_SET_TAILANDLEN 207 +#define TAG_SET_HEADANDNEXT 209 +#define TAG_SETHEADGETNEXT_DATA 210 +#define TAG_SET_NEXT 211 +#define TAG_SETHEADGETNEXT 213 +#define TAG_GET_NEXT 215 +#define TAG_GET_NEXT_DATA 216 +#define TAG_GET_HEAD 217 +#define TAG_GET_HEAD_DATA 218 +#define TAG_ADD_PARTICLE 219 +#define TAG_ADDBOUND 220 +#define TAG_NID 222 +#define TAG_NID_DATA 223 +#define TAG_SETRANK 224 +#define TAG_SETRANK_OUT 226 +#define TAG_GET_RANK 227 +#define TAG_GET_RANK_DATA 228 +#define TAG_MARK_PARTICLE 229 +#define TAG_SET_NEWTAIL 230 +#define TAG_GET_OLDTAIL 231 +#define TAG_GET_TWOHEADS 232 +#define TAG_GET_TWOHEADS_DATA 233 + +#define MASK ((((long long)1) << 32) - 1) +#define HIGHBIT (1 << 30) + +static long long *Head, *Next, *Tail; +static int *Len; +static int LocalLen; +static int count_cand, max_coll_candidates; + +static struct unbind_data *ud; + +/*! \brief Data structure for sorting density data. + */ +static struct sort_density_data +{ + MyFloat density; + int ngbcount; + long long index; /* this will store the task in the upper word */ + long long ngb_index1, ngb_index2; +} * sd; + +/*! \brief Processes a group collectively on all MPI tasks. + * + * \param[in] nsubgroups_cat (unused) + * + * \return void + */ +void subfind_process_group_collectively(int nsubgroups_cat) +{ + int totgrouplen1, totgrouplen2; + + /* make a sanity check: We should have exactly 1 group, stored on the root of the processor subset */ + if(SubThisTask == 0) + { + if(Ngroups != 1) + terminate("Ngroups=%d != 1 SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask); + } + else + { + if(Ngroups != 0) + terminate("Ngroups=%d != 0 SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask); + } + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: Collectively doing halo %d of length %d on %d processors.\n", ThisTask, + Group[0].GrNr, Group[0].Len, SubNTask); + + GrNr = Group[0].GrNr; + totgrouplen2 = Group[0].Len; + for(int j = 0; j < 3; j++) + GrCM[j] = Group[0].CM[j]; + } + + /* tell everybody in the set the group number, the center of mass, and the grouplen */ + MPI_Bcast(&GrNr, 1, MPI_INT, 0, SubComm); + MPI_Bcast(&GrCM[0], 3 * sizeof(MyDouble), MPI_BYTE, 0, SubComm); + MPI_Bcast(&totgrouplen2, 1, MPI_INT, 0, SubComm); + + NumPartGroup = 0; + for(int i = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + NumPartGroup++; + + MPI_Allreduce(&NumPartGroup, &totgrouplen1, 1, MPI_INT, MPI_SUM, SubComm); + + /* sanity check that we actually have all the right particles on the processor subset */ + if(totgrouplen1 != totgrouplen2) + terminate("totgrouplen1=%d != totgrouplen2=%d", totgrouplen1, totgrouplen2); /* inconsistency */ + + /* do a domain decomposition just for this halo */ + subfind_coll_domain_decomposition(); + + /* copy over the domain dimensions to serial tree code, as this may be used in the collective unbinding */ + subfind_loctree_copyExtent(); + + /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(int i = 0; i < NumPart; i++) + { + PS[i].SubNr = TotNgroups + 1; /* set a default that is larger than reasonable group number */ + PS[i].OldIndex = i; + submp[i].index = i; + submp[i].GrNr = PS[i].GrNr; + submp[i].DM_Density = PS[i].Density; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density); + subfind_reorder_according_to_submp(); + myfree(submp); + + /* note: now we have the particles of the group at the beginning, but SPH particles are not aligned. + They can however be accessed via SphP[PS[i].OldIndex] */ + + /* re-determine the number of local group particles, which has changed due to domain decomposition */ + NumPartGroup = 0; + for(int i = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + NumPartGroup++; + + /* allocate some storage for the halo */ + subfind_coll_treeallocate(NumPart, All.MaxPart); + + /* construct a tree for the halo */ + subfind_coll_treebuild(NumPartGroup, NULL); + +#ifdef SUBFIND_EXTENDED_PROPERTIES + // calculate binding energy of full fof group + { + struct unbind_data *ud = (struct unbind_data *)mymalloc_movable(&ud, "ud", NumPartGroup * sizeof(struct unbind_data)); + + NumPartGroup = 0; + for(int i = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + ud[NumPartGroup++].index = i; + + subfind_potential_compute(NumPartGroup, ud, 0, 0); + + double binding_energy_local = 0, binding_energy_global; + + for(int i = 0; i < NumPartGroup; i++) + binding_energy_local += 0.5 * P[ud[i].index].Mass * PS[ud[i].index].Potential; + + MPI_Allreduce(&binding_energy_local, &binding_energy_global, 1, MPI_DOUBLE, MPI_SUM, SubComm); + Group[0].Epot = binding_energy_global; + + myfree(ud); + ud = NULL; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + long long p; + int len; + int ncand, parent, totcand, nremaining; + int max_loc_length, max_length; + int count, countall, *countlist, *offset; + int i, j, k, nr, grindex = 0, nsubs, subnr; + int count_leaves, tot_count_leaves, master; + struct coll_cand_dat *tmp_coll_candidates = 0; + double t0, t1, tt0, tt1; + + /* determine the radius that encloses a certain number of link particles */ + subfind_find_linkngb(); + + sd = (struct sort_density_data *)mymalloc_movable(&sd, "sd", NumPartGroup * sizeof(struct sort_density_data)); + + /* determine the indices of the nearest two denser neighbours within the link region */ + NgbLoc = (struct nearest_ngb_data *)mymalloc("NgbLoc", NumPartGroup * sizeof(struct nearest_ngb_data)); + R2Loc = (struct nearest_r2_data *)mymalloc("R2Loc", NumPartGroup * sizeof(struct nearest_r2_data)); + + subfind_find_nearesttwo(); + + for(i = 0; i < NumPartGroup; i++) + { + sd[i].density = PS[i].Density; + sd[i].ngbcount = NgbLoc[i].count; + sd[i].index = (((long long)SubThisTask) << 32) + i; + sd[i].ngb_index1 = NgbLoc[i].index[0]; + sd[i].ngb_index2 = NgbLoc[i].index[1]; + } + myfree(R2Loc); + myfree(NgbLoc); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: before parallel sort of 'sd'.\n", ThisTask); + fflush(stdout); + } + + /* sort the densities */ + parallel_sort_comm(sd, NumPartGroup, sizeof(struct sort_density_data), subfind_compare_densities, SubComm); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: parallel sort of 'sd' done.\n", ThisTask); + fflush(stdout); + } + + /* allocate and initialize distributed link list */ + Head = (long long *)mymalloc_movable(&Head, "Head", NumPartGroup * sizeof(long long)); + Next = (long long *)mymalloc_movable(&Next, "Next", NumPartGroup * sizeof(long long)); + Tail = (long long *)mymalloc_movable(&Tail, "Tail", NumPartGroup * sizeof(long long)); + Len = (int *)mymalloc_movable(&Len, "Len", NumPartGroup * sizeof(int)); + + for(i = 0; i < NumPartGroup; i++) + { + Head[i] = Next[i] = Tail[i] = -1; + Len[i] = 0; + } + + /* allocate a list to store subhalo coll_candidates */ + max_coll_candidates = imax((NumPartGroup / 50), 200); + coll_candidates = (struct coll_cand_dat *)mymalloc_movable(&coll_candidates, "coll_candidates", + max_coll_candidates * sizeof(struct coll_cand_dat)); + count_cand = 0; + + subfind_col_find_coll_candidates(totgrouplen1); + + /* establish total number of coll_candidates */ + MPI_Allreduce(&count_cand, &totcand, 1, MPI_INT, MPI_SUM, SubComm); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: total number of subhalo coll_candidates=%d\n", ThisTask, totcand); + fflush(stdout); + } + + nremaining = totcand; + + for(i = 0; i < NumPartGroup; i++) + Tail[i] = -1; + + for(i = 0; i < count_cand; i++) + coll_candidates[i].parent = 0; + + do + { + /* Let's see which coll_candidates can be unbound independent from each other. + We identify them with those coll_candidates that have no embedded other candidate */ + t0 = second(); + if(SubThisTask == 0) + tmp_coll_candidates = (struct coll_cand_dat *)mymalloc("tmp_coll_candidates", totcand * sizeof(struct coll_cand_dat)); + + count = count_cand; + count *= sizeof(struct coll_cand_dat); + + countlist = (int *)mymalloc("countlist", SubNTask * sizeof(int)); + offset = (int *)mymalloc("offset", SubNTask * sizeof(int)); + + MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm); + + for(i = 1, offset[0] = 0; i < SubNTask; i++) + offset[i] = offset[i - 1] + countlist[i - 1]; + + MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm); + + if(SubThisTask == 0) + { + for(k = 0; k < totcand; k++) + { + tmp_coll_candidates[k].nsub = k; + tmp_coll_candidates[k].subnr = k; + } + + qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_rank); + for(k = 0; k < totcand; k++) + { + if(tmp_coll_candidates[k].parent >= 0) + { + tmp_coll_candidates[k].parent = 0; + + for(j = k + 1; j < totcand; j++) + { + if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len) + break; + + if(tmp_coll_candidates[j].parent < 0) /* ignore these */ + continue; + + if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >= + tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len) + { + tmp_coll_candidates[k].parent++; /* we here count the number of subhalos that are enclosed */ + } + else + { + terminate("k=%d|%d has rank=%d and len=%d. j=%d has rank=%d and len=%d\n", k, totcand, + (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len, j, + (int)tmp_coll_candidates[j].rank, (int)tmp_coll_candidates[j].len); + } + } + } + } + + qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_subnr); + } + + MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm); + + myfree(offset); + myfree(countlist); + + if(SubThisTask == 0) + myfree(tmp_coll_candidates); + + for(i = 0, count_leaves = 0, max_loc_length = 0; i < count_cand; i++) + if(coll_candidates[i].parent == 0) + { + if(coll_candidates[i].len > max_loc_length) + max_loc_length = coll_candidates[i].len; + + if(coll_candidates[i].len > 0.20 * All.TotNumPart / NTask) /* seems large, let's rather do it collectively */ + { + coll_candidates[i].parent++; /* this will ensure that it is not considered in this round */ + } + else + { + count_leaves++; + } + } + + MPI_Allreduce(&count_leaves, &tot_count_leaves, 1, MPI_INT, MPI_SUM, SubComm); + MPI_Allreduce(&max_loc_length, &max_length, 1, MPI_INT, MPI_MAX, SubComm); + + t1 = second(); + if(SubThisTask == 0) + printf( + "SUBFIND-COLLECTIVE, root-task=%d: number of subhalo coll_candidates that can be done independently=%d. (Largest size %d, " + "finding took %g sec)\n", + ThisTask, tot_count_leaves, max_length, timediff(t0, t1)); + + if(tot_count_leaves <= 0) /* if there are none left, we break and do the reset collectively */ + { + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: too few, I do the rest of %d collectively\n", ThisTask, nremaining); + break; + } + + nremaining -= tot_count_leaves; + + for(i = 0; i < NumPart; i++) + { + PS[i].origintask = PS[i].TargetTask = SubThisTask; + PS[i].originindex = i; + PS[i].submark = HIGHBIT; + if(i < NumPartGroup) + if(Tail[i] >= 0) /* this means this particle is already bound to a substructure */ + PS[i].origintask |= HIGHBIT; + } + + /* we now mark the particles that are in subhalo coll_candidates that can be processed independently in parallel */ + nsubs = 0; + t0 = second(); + for(master = 0; master < SubNTask; master++) + { + ncand = count_cand; + + MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm); + + for(k = 0; k < ncand; k++) + { + if(SubThisTask == master) + { + len = coll_candidates[k].len; + parent = coll_candidates[k].parent; /* this is here actually the daughter count */ + } + + MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm); + MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm); + MPI_Barrier(SubComm); + + if(parent == 0) + { + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(i = 0, p = coll_candidates[k].head; i < coll_candidates[k].len; i++) + { + subfind_distlinklist_mark_particle(p, master, nsubs); + + if(p < 0) + terminate("Bummer i=%d \n", i); + + p = subfind_distlinklist_get_next(p); + } + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != SubThisTask) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + } + + nsubs++; + } + } + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: particles are marked (took %g)\n", ThisTask, timediff(t0, t1)); + fflush(stdout); + } + + for(i = 0; i < NumPart; i++) + PS[i].TargetIndex = PS[i].submark; /* this will make sure that the particles are grouped by submark on the target task */ + + t0 = second(); + subfind_distribute_particles(SubComm); /* assemble the particles on individual processors */ + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: distribution of independent ones took %g sec\n", ThisTask, timediff(t0, t1)); + fflush(stdout); + } + + MPI_Barrier(SubComm); + t0 = second(); + + subfind_unbind_independent_ones(count_cand); + + MPI_Barrier(SubComm); + t1 = second(); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: unbinding of independent ones took %g sec\n", ThisTask, timediff(t0, t1)); + fflush(stdout); + } + + for(i = 0; i < NumPart; i++) + { + PS[i].origintask &= (HIGHBIT - 1); /* clear high bit if set */ + PS[i].TargetTask = PS[i].origintask; + PS[i].TargetIndex = PS[i].originindex; + } + + t0 = second(); + subfind_distribute_particles(SubComm); /* bring them back to their original processor */ + + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: bringing the independent ones back took %g sec\n", ThisTask, timediff(t0, t1)); + fflush(stdout); + } + + /* now mark the bound particles */ + for(i = 0; i < NumPartGroup; i++) + if(PS[i].submark >= 0 && PS[i].submark < nsubs) + Tail[i] = PS[i].submark; /* we use this to flag bound parts of substructures */ + + for(i = 0; i < count_cand; i++) + if(coll_candidates[i].parent == 0) + coll_candidates[i].parent = -1; + } + while(tot_count_leaves > 0); + + /**** now we do the collective unbinding of the subhalo coll_candidates that contain other subhalo coll_candidates ****/ + ud = (struct unbind_data *)mymalloc_movable(&ud, "ud", NumPartGroup * sizeof(struct unbind_data)); + + t0 = second(); + for(master = 0, nr = 0; master < SubNTask; master++) + { + ncand = count_cand; + + MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm); + + for(k = 0; k < ncand; k++) + { + if(SubThisTask == master) + { + len = coll_candidates[k].len; + nsubs = coll_candidates[k].nsub; + parent = coll_candidates[k].parent; /* this is here actually the daughter count */ + } + + MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm); + MPI_Barrier(SubComm); + + if(parent >= 0) + { + MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm); + MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: collective unbinding of nr=%d (%d) of length=%d\n", ThisTask, nr, + nremaining, (int)len); + fflush(stdout); + } + + nr++; + + LocalLen = 0; + + tt0 = second(); + + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(i = 0, p = coll_candidates[k].head; i < coll_candidates[k].len; i++) + { + subfind_distlinklist_add_particle(p); + if(p < 0) + terminate("Bummer i=%d \n", i); + + p = subfind_distlinklist_get_next(p); + } + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != SubThisTask) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + + int LocalNonGasLen; + + LocalLen = subfind_col_unbind(ud, LocalLen, &LocalNonGasLen); + + tt1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: took %g sec\n", ThisTask, timediff(tt0, tt1)); + fflush(stdout); + } + + MPI_Allreduce(&LocalLen, &len, 1, MPI_INT, MPI_SUM, SubComm); + + if(len >= All.DesLinkNgb) + { + /* ok, we found a substructure */ + + for(i = 0; i < LocalLen; i++) + Tail[ud[i].index] = nsubs; /* we use this to flag the substructures */ + + if(SubThisTask == master) + { + coll_candidates[k].bound_length = len; + } + } + else + { + if(SubThisTask == master) + { + coll_candidates[k].bound_length = 0; + } + } + } + } + } + t1 = second(); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: the collective unbinding of remaining halos took %g sec\n", ThisTask, + timediff(t0, t1)); + fflush(stdout); + } + + for(k = 0, count = 0; k < count_cand; k++) + if(coll_candidates[k].bound_length >= All.DesLinkNgb) + { + if(coll_candidates[k].len < All.DesLinkNgb) + terminate("coll_candidates[k=%d].len=%d bound=%d\n", k, coll_candidates[k].len, coll_candidates[k].bound_length); + + count++; + } + + MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, SubComm); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: found %d bound substructures in FoF group of length %d\n", ThisTask, countall, + totgrouplen1); + fflush(stdout); + } + + /* now determine the parent subhalo for each candidate */ + t0 = second(); + parallel_sort_comm(coll_candidates, count_cand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_boundlength, SubComm); + + if(SubThisTask == 0) + tmp_coll_candidates = (struct coll_cand_dat *)mymalloc("tmp_coll_candidates", totcand * sizeof(struct coll_cand_dat)); + + count = count_cand; + count *= sizeof(struct coll_cand_dat); + + countlist = (int *)mymalloc("countlist", SubNTask * sizeof(int)); + offset = (int *)mymalloc("offset", SubNTask * sizeof(int)); + + MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm); + + for(i = 1, offset[0] = 0; i < SubNTask; i++) + offset[i] = offset[i - 1] + countlist[i - 1]; + + MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm); + + if(SubThisTask == 0) + { + for(k = 0; k < totcand; k++) + { + tmp_coll_candidates[k].subnr = k; + tmp_coll_candidates[k].parent = 0; + } + + qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_rank); + + for(k = 0; k < totcand; k++) + { + for(j = k + 1; j < totcand; j++) + { + if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len) + break; + + if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >= tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len) + { + if(tmp_coll_candidates[k].bound_length >= All.DesLinkNgb) + tmp_coll_candidates[j].parent = tmp_coll_candidates[k].subnr; + } + else + { + terminate("k=%d|%d has rank=%d and len=%d. j=%d has rank=%d and len=%d bound=%d\n", k, countall, + (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len, + (int)tmp_coll_candidates[k].bound_length, (int)tmp_coll_candidates[j].rank, + (int)tmp_coll_candidates[j].len, (int)tmp_coll_candidates[j].bound_length); + } + } + } + + qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_subnr); + } + + MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm); + + myfree(offset); + myfree(countlist); + + if(SubThisTask == 0) + myfree(tmp_coll_candidates); + + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: determination of parent subhalo took %g sec (presently allocated %g MB)\n", ThisTask, + timediff(t0, t1), AllocatedBytes / (1024.0 * 1024.0)); + fflush(stdout); + } + + /* Now let's save some properties of the substructures */ + if(SubThisTask == 0) + Group[0].Nsubs = countall; + + t0 = second(); + for(master = 0, subnr = 0; master < SubNTask; master++) + { + ncand = count_cand; + MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm); + + for(k = 0; k < ncand; k++) + { + if(SubThisTask == master) + { + len = coll_candidates[k].bound_length; + nsubs = coll_candidates[k].nsub; + parent = coll_candidates[k].parent; + } + + MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm); + MPI_Barrier(SubComm); + + if(len > 0) + { + MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm); + MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm); + + LocalLen = 0; + + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(i = 0, p = coll_candidates[k].head; i < coll_candidates[k].len; i++) + { + subfind_distlinklist_add_bound_particles(p, nsubs); + p = subfind_distlinklist_get_next(p); + } + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != SubThisTask) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + + if(SubThisTask == 0) + { + if(Nsubgroups >= MaxNsubgroups) + terminate("Nsubgroups=%d >= MaxNsubgroups=%d", Nsubgroups, MaxNsubgroups); + } + + tt0 = second(); + subfind_determine_sub_halo_properties(ud, LocalLen, &SubGroup[Nsubgroups], GrNr, subnr, 1, nsubgroups_cat); + tt1 = second(); + + /* we have filled into ud the binding energy and the particle ID return */ + + if(SubThisTask == 0) + { + if(Nsubgroups >= MaxNsubgroups) + terminate("Nsubgroups >= MaxNsubgroups"); + + if(subnr == 0) + { + for(j = 0; j < 3; j++) + Group[grindex].Pos[j] = SubGroup[Nsubgroups].Pos[j]; + } + + SubGroup[Nsubgroups].GrNr = GrNr; + SubGroup[Nsubgroups].SubNr = subnr; + SubGroup[Nsubgroups].SubParent = parent; + + Nsubgroups++; + } + + /* Let's now assign the subgroup number */ + for(i = 0; i < LocalLen; i++) + PS[ud[i].index].SubNr = subnr; + + subnr++; + } + } + } + + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: determining substructure properties took %g sec (presently allocated %g MB)\n", + ThisTask, timediff(t0, t1), AllocatedBytes / (1024.0 * 1024.0)); + fflush(stdout); + } + + myfree(ud); + ud = NULL; + myfree(coll_candidates); + myfree(Len); + myfree(Tail); + myfree(Next); + myfree(Head); + myfree(sd); + + subfind_coll_treefree(); + subfind_coll_domain_free(); + + /* undo local rearrangement that made group consecutive. After that, the association of SphP[] will be correct again */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(int i = 0; i < NumPart; i++) + { + submp[i].index = i; + submp[i].OldIndex = PS[i].OldIndex; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex); + subfind_reorder_according_to_submp(); + myfree(submp); +} + +#ifdef SUBFIND_EXTENDED_PROPERTIES +/*! \brief Calculates angualar momentum collectively on all MPI tasks. + * + * \param[in] snapnr (unused) + * \param[in] ngroups_cat (unused) + * + * \return void + */ +void subfind_fof_calc_am_collective(int snapnr, int ngroups_cat) +{ + int len, totgrouplen1, totgrouplen2; + long long index; + + int grindex = 0, i, k, ptype; + double Pos_pbc[3], Vel_tot[3], gr_pos[3], gr_vel[3]; + double gr_Jtot[3], gr_Jdm[3], gr_Jgas[3], gr_Jstars[3], jpart[3]; + double gr_CMFrac, gr_CMFracType[NTYPES]; + int gr_len_dm; + double gr_mass, gr_mass_gas, gr_mass_stars; // gr_mass_dm, + double gr_Ekin, gr_Ethr; + + /* make a sanity check: We should have exactly 1 group, stored on the root of the processor subset */ + if(SubThisTask == 0) + { + if(Ngroups != 1) + terminate("Ngroups=%d != 1 SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask); + } + else + { + if(Ngroups != 0) + terminate("Ngroups=%d != 0 SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask); + } + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: Collectively doing AM of halo %d of length %d on %d processors.\n", ThisTask, + Group[0].GrNr, Group[0].Len, SubNTask); + + totgrouplen2 = Group[0].Len; + } + + /* tell everybody in the set the group number and the grouplen */ + MPI_Bcast(&GrNr, 1, MPI_INT, 0, SubComm); + MPI_Bcast(&totgrouplen2, 1, MPI_INT, 0, SubComm); + + for(i = 0, NumPartGroup = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + NumPartGroup++; + + MPI_Allreduce(&NumPartGroup, &totgrouplen1, 1, MPI_INT, MPI_SUM, SubComm); + + /* sanity check that we actually have all the right particles on the processor subset */ + if(totgrouplen1 != totgrouplen2) + terminate("totgrouplen1 != totgrouplen2"); /* inconsistency */ + + /* do a domain decomposition just for this halo */ + subfind_coll_domain_decomposition(); + + /* copy over the domain dimensions to serial tree code, as this may be used in the collective unbinding */ + subfind_loctree_copyExtent(); + + /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(i = 0; i < NumPart; i++) + { + PS[i].OldIndex = i; + submp[i].index = i; + submp[i].GrNr = PS[i].GrNr; + submp[i].DM_Density = PS[i].Density; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density); + subfind_reorder_according_to_submp(); + myfree(submp); + + /* note: now we have the particles of the group at the beginning, but SPH particles are not aligned. + They can however be accessed via SphP[PS[i].OldIndex] */ + + /* re-determine the number of local group particles, which has changed due to domain decomposition */ + for(i = 0, NumPartGroup = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + NumPartGroup++; + + ud = (struct unbind_data *)mymalloc("ud", NumPartGroup * sizeof(struct unbind_data)); + len = NumPartGroup; + + // pick my particles + for(i = 0; i < len; i++) + ud[i].index = i; + + // initialize + gr_CMFrac = 0; + gr_Ekin = 0; + gr_Ethr = 0; + for(k = 0; k < 3; k++) + { + gr_Jtot[k] = 0; + gr_Jdm[k] = 0; + gr_Jgas[k] = 0; + gr_Jstars[k] = 0; + } + for(k = 0; k < NTYPES; k++) + { + gr_CMFracType[k] = 0; + } + + if(SubThisTask == 0) + { + for(k = 0; k < 3; k++) + { + gr_pos[k] = Group[grindex].Pos[k]; + gr_vel[k] = Group[grindex].Vel[k]; + } + } + + // send group properties stored only on root task to all participating tasks + MPI_Bcast(gr_pos, 3, MPI_DOUBLE, 0, SubComm); + MPI_Bcast(gr_vel, 3, MPI_DOUBLE, 0, SubComm); + + for(k = 0; k < len; k++) + { + index = ud[k].index; + ptype = P[index].Type; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = P[index].Pos[i] - gr_pos[i]; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = fof_periodic(Pos_pbc[i]); + + for(i = 0; i < 3; i++) + Pos_pbc[i] = Pos_pbc[i] * All.cf_atime; /* convert to physical length */ + + for(i = 0; i < 3; i++) + Vel_tot[i] = P[index].Vel[i] / All.cf_atime - gr_vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i]; + + gr_Ekin += (P[index].Mass / 2) * (Vel_tot[0] * Vel_tot[0] + Vel_tot[1] * Vel_tot[1] + Vel_tot[2] * Vel_tot[2]); + if(P[index].Type == 0) + gr_Ethr += P[index].Mass * SphP[PS[index].OldIndex].Utherm; + + gr_Jtot[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jtot[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jtot[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + if(ptype == 1) // dm illustris + { + gr_Jdm[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jdm[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jdm[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 0) // gas (incl. winds) + { + gr_Jgas[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jgas[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jgas[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 4) // stars + { + gr_Jstars[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jstars[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jstars[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + } + + MPI_Allreduce(MPI_IN_PLACE, gr_Jtot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, gr_Jdm, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, gr_Jgas, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, gr_Jstars, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_Ekin, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_Ethr, 1, MPI_DOUBLE, MPI_SUM, SubComm); + + // save the properties + if(SubThisTask == 0) + { + Group[grindex].Ekin = gr_Ekin; + Group[grindex].Ethr = gr_Ethr; + for(i = 0; i < 3; i++) + { + Group[grindex].J[i] = gr_Jtot[i]; + Group[grindex].JDM[i] = gr_Jdm[i]; + Group[grindex].JGas[i] = gr_Jgas[i]; + Group[grindex].JStars[i] = gr_Jstars[i]; + } + } + + // calculate counter-rotating fractions + gr_len_dm = 0; + gr_mass = gr_mass_gas = gr_mass_stars = 0; + + for(k = 0; k < len; k++) + { + index = ud[k].index; + ptype = P[index].Type; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = P[index].Pos[i] - gr_pos[i]; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = fof_periodic(Pos_pbc[i]); + + for(i = 0; i < 3; i++) + Pos_pbc[i] = Pos_pbc[i] * All.cf_atime; // units: phys kpc/h + + for(i = 0; i < 3; i++) + Vel_tot[i] = P[index].Vel[i] / All.cf_atime - gr_vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i]; + + jpart[0] = P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + jpart[1] = P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + jpart[2] = P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + gr_mass += P[index].Mass; + if((gr_Jtot[0] * jpart[0] + gr_Jtot[1] * jpart[1] + gr_Jtot[2] * jpart[2]) < 0.) + gr_CMFrac += P[index].Mass; // / gr_mass; + + if(ptype == 1) // dm illustris + { + gr_len_dm++; + if((gr_Jdm[0] * jpart[0] + gr_Jdm[1] * jpart[1] + gr_Jdm[2] * jpart[2]) < 0.) + gr_CMFracType[1]++; //= P[index].Mass / gr_mass_dm; + } + if(ptype == 0) // gas (incl. winds) + { + gr_mass_gas += P[index].Mass; + if((gr_Jgas[0] * jpart[0] + gr_Jgas[1] * jpart[1] + gr_Jgas[2] * jpart[2]) < 0.) + gr_CMFracType[0] += P[index].Mass; // / gr_mass_gas; + } + if(ptype == 4) // stars + { + gr_mass_stars += P[index].Mass; + if((gr_Jstars[0] * jpart[0] + gr_Jstars[1] * jpart[1] + gr_Jstars[2] * jpart[2]) < 0.) + gr_CMFracType[4] += P[index].Mass; // / gr_mass_stars; + } + } + + MPI_Allreduce(MPI_IN_PLACE, &gr_mass, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_len_dm, 1, MPI_INT, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_mass_gas, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_mass_stars, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_CMFrac, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, gr_CMFracType, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + + // save the properties + if(SubThisTask == 0) + { + gr_CMFrac /= gr_mass; + gr_CMFracType[1] /= gr_len_dm; + gr_CMFracType[0] /= gr_mass_gas; + gr_CMFracType[4] /= gr_mass_stars; + + Group[grindex].CMFrac = gr_CMFrac; + for(i = 0; i < NTYPES; i++) + Group[grindex].CMFracType[i] = gr_CMFracType[i]; + } + + myfree(ud); + + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE: root-task = %d AM done.\n", ThisTask); + + subfind_coll_domain_free(); + + /* undo local rearrangement that made group consecutive. After that, the association of SphP[] will be correct again */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(i = 0; i < NumPart; i++) + { + submp[i].index = i; + submp[i].OldIndex = PS[i].OldIndex; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex); + subfind_reorder_according_to_submp(); + myfree(submp); +} +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +/*! \brief Finds candidates for subfind collective. + * + * \param[in] totgrouplen Length of group. + * + * \return void + */ +void subfind_col_find_coll_candidates(int totgrouplen) +{ + int ngbcount, retcode, len_attach; + int i, k, len, master; + long long prev, tail, tail_attach, tmp, next, index; + long long p, ss, head, head_attach, ngb_index1, ngb_index2, rank; + double t0, t1, tt0, tt1; + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: building distributed linked list. (presently allocated %g MB)\n", ThisTask, + AllocatedBytes / (1024.0 * 1024.0)); + fflush(stdout); + } + + /* now find the subhalo coll_candidates by building up link lists from high density to low density */ + t0 = second(); + for(master = 0; master < SubNTask; master++) + { + tt0 = second(); + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(k = 0; k < NumPartGroup; k++) + { + ngbcount = sd[k].ngbcount; + ngb_index1 = sd[k].ngb_index1; + ngb_index2 = sd[k].ngb_index2; + + switch(ngbcount) /* treat the different possible cases */ + { + case 0: /* this appears to be a lonely maximum -> new group */ + subfind_distlinklist_set_all(sd[k].index, sd[k].index, sd[k].index, 1, -1); + break; + + case 1: /* the particle is attached to exactly one group */ + head = subfind_distlinklist_get_head(ngb_index1); + + if(head == -1) + terminate("We have a problem! head=%d/%d for k=%d on task=%d\n", (int)(head >> 32), (int)head, k, SubThisTask); + + retcode = subfind_distlinklist_get_tail_set_tail_increaselen(head, &tail, sd[k].index); + + if(!(retcode & 1)) + subfind_distlinklist_set_headandnext(sd[k].index, head, -1); + if(!(retcode & 2)) + subfind_distlinklist_set_next(tail, sd[k].index); + break; + + case 2: /* the particle merges two groups together */ + if((ngb_index1 >> 32) == (ngb_index2 >> 32)) + { + subfind_distlinklist_get_two_heads(ngb_index1, ngb_index2, &head, &head_attach); + } + else + { + head = subfind_distlinklist_get_head(ngb_index1); + head_attach = subfind_distlinklist_get_head(ngb_index2); + } + + if(head == -1 || head_attach == -1) + terminate("We have a problem! head=%d/%d head_attach=%d/%d for k=%d on task=%d\n", (int)(head >> 32), (int)head, + (int)(head_attach >> 32), (int)head_attach, k, SubThisTask); + + if(head != head_attach) + { + subfind_distlinklist_get_tailandlen(head, &tail, &len); + subfind_distlinklist_get_tailandlen(head_attach, &tail_attach, &len_attach); + + if(len_attach > len || + (len_attach == len && + head_attach < head)) /* other group is longer, swap them. for equal length, take the larger head value */ + { + tmp = head; + head = head_attach; + head_attach = tmp; + tmp = tail; + tail = tail_attach; + tail_attach = tmp; + tmp = len; + len = len_attach; + len_attach = tmp; + } + + /* only in case the attached group is long enough we bother to register it + as a subhalo candidate */ + + if(len_attach >= All.DesLinkNgb) + { + if(count_cand < max_coll_candidates) + { + coll_candidates[count_cand].len = len_attach; + coll_candidates[count_cand].head = head_attach; + count_cand++; + } + else + terminate("Task %d: count=%d, max=%d, npartgroup=%d\n", SubThisTask, count_cand, max_coll_candidates, + NumPartGroup); + } + + /* now join the two groups */ + subfind_distlinklist_set_tailandlen(head, tail_attach, len + len_attach); + subfind_distlinklist_set_next(tail, head_attach); + + ss = head_attach; + do + { + ss = subfind_distlinklist_set_head_get_next(ss, head); + } + while(ss >= 0); + } + + /* finally, attach the particle to 'head' */ + retcode = subfind_distlinklist_get_tail_set_tail_increaselen(head, &tail, sd[k].index); + + if(!(retcode & 1)) + subfind_distlinklist_set_headandnext(sd[k].index, head, -1); + if(!(retcode & 2)) + subfind_distlinklist_set_next(tail, sd[k].index); + break; + } + } + + fflush(stdout); + + /* now tell the others to stop polling */ + for(k = 0; k < SubNTask; k++) + if(k != SubThisTask) + MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + tt1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: ma=%d/%d took %g sec\n", ThisTask, master, SubNTask, timediff(tt0, tt1)); + fflush(stdout); + } + } + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: identification of primary coll_candidates took %g sec\n", ThisTask, timediff(t0, t1)); + + /* add the full thing as a subhalo candidate */ + t0 = second(); + for(master = 0, head = -1, prev = -1; master < SubNTask; master++) + { + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(i = 0; i < NumPartGroup; i++) + { + index = (((long long)SubThisTask) << 32) + i; + + if(Head[i] == index) + { + subfind_distlinklist_get_tailandlen(Head[i], &tail, &len); + next = subfind_distlinklist_get_next(tail); + if(next == -1) + { + if(prev < 0) + head = index; + + if(prev >= 0) + subfind_distlinklist_set_next(prev, index); + + prev = tail; + } + } + } + + /* now tell the others to stop polling */ + for(k = 0; k < SubNTask; k++) + if(k != SubThisTask) + MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + MPI_Bcast(&head, sizeof(head), MPI_BYTE, master, SubComm); + MPI_Bcast(&prev, sizeof(prev), MPI_BYTE, master, SubComm); + } + + if(SubThisTask == SubNTask - 1) + { + if(count_cand < max_coll_candidates) + { + coll_candidates[count_cand].len = totgrouplen; + coll_candidates[count_cand].head = head; + count_cand++; + } + else + terminate("count_cand=%d >= max_coll_candidates=%d", count_cand, max_coll_candidates); + } + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: adding background as candidate took %g sec\n", ThisTask, timediff(t0, t1)); + + /* go through the whole chain once to establish a rank order. For the rank we use Len[] */ + t0 = second(); + + master = (head >> 32); + + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + p = head; + rank = 0; + + while(p >= 0) + { + p = subfind_distlinklist_setrank_and_get_next(p, &rank); + } + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != master) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + MPI_Bcast(&rank, sizeof(rank), MPI_BYTE, master, SubComm); /* just for testing */ + + /* for each candidate, we now pull out the rank of its head */ + for(master = 0; master < SubNTask; master++) + { + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(k = 0; k < count_cand; k++) + coll_candidates[k].rank = subfind_distlinklist_get_rank(coll_candidates[k].head); + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != SubThisTask) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + } + MPI_Barrier(SubComm); + + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: establishing of rank order took %g sec (p=%d, grouplen=%d) presently allocated %g MB\n", + ThisTask, timediff(t0, t1), (int)rank, totgrouplen, AllocatedBytes / (1024.0 * 1024.0)); + + if(((int)rank) != totgrouplen) + terminate("mismatch\n"); +} + +/*! \brief Unbinding for independent subgroups. + * + * \param[in] cont_cand Number of subgroup candidates. + * + * \return void + */ +void subfind_unbind_independent_ones(int count_cand) +{ + int i, j, k, len, nsubs, len_non_gas; + + ud = (struct unbind_data *)mymalloc("ud", NumPart * sizeof(struct unbind_data)); + + subfind_loctree_treeallocate(All.TreeAllocFactor * NumPart, NumPart); + + qsort(coll_candidates, count_cand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_nsubs); + + for(k = 0, i = 0; k < count_cand; k++) + if(coll_candidates[k].parent == 0) + { + while(PS[i].submark < coll_candidates[k].nsub) + { + i++; + if(i >= NumPart) + terminate("i >= NumPart"); + } + + if(PS[i].submark >= 0 && PS[i].submark < HIGHBIT) + { + len = 0; + nsubs = PS[i].submark; + + if(nsubs != coll_candidates[k].nsub) + { + terminate("TASK=%d i=%d k=%d nsubs=%d coll_candidates[k].nsub=%d\n", SubThisTask, i, k, nsubs, + coll_candidates[k].nsub); + } + + while(i < NumPart) + { + if(PS[i].submark == nsubs) + { + PS[i].submark = HIGHBIT; + if((PS[i].origintask & HIGHBIT) == 0) + { + ud[len].index = i; + len++; + } + i++; + } + else + break; + } + + /* call the serial unbind function */ + len = subfind_unbind(ud, len, &len_non_gas); + + if(len >= All.DesLinkNgb) + { + /* ok, we found a substructure */ + coll_candidates[k].bound_length = len; + + for(j = 0; j < len; j++) + PS[ud[j].index].submark = nsubs; /* we use this to flag the substructures */ + } + else + coll_candidates[k].bound_length = 0; + } + } + + subfind_loctree_treefree(); + + myfree(ud); +} + +/*! \brief Unbinding for subfind collective. + * + * \param[in] d Unbind data. + * \param[in] num Number of particles in subgroup. + * \param[out] num_non_gas Number of particles which are not gas cells. + * + * \return + */ +int subfind_col_unbind(struct unbind_data *d, int num, int *num_non_gas) +{ + int iter = 0; + int i, j, p, part_index, minindex, task; + int unbound, totunbound, numleft, mincpu; + int *npart, *offset, *nbu_count, count_bound_unbound, phaseflag; + double s[3], dx[3], ddxx, v[3], dv[3], sloc[3], vloc[3], pos[3]; + double vel_to_phys, atime; + MyFloat minpot, *potlist; + double boxsize, xtmp; + double mass, massloc; + double *bnd_energy, energy_limit, energy_limit_local, weakly_bound_limit_local, weakly_bound_limit = 0; + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: beginning of subfind_col_unbind()\n", ThisTask); + fflush(stdout); + } + + boxsize = All.BoxSize; + + vel_to_phys = 1.0 / All.cf_atime; + atime = All.cf_atime; + + phaseflag = 0; /* this means we will recompute the potential for all particles */ + + do + { + subfind_coll_treebuild(num, d); + + /* let's compute the potential energy */ + + subfind_potential_compute(num, d, phaseflag, weakly_bound_limit); + + if(phaseflag == 0) + { + potlist = (MyFloat *)mymalloc("potlist", SubNTask * sizeof(MyFloat)); + + for(i = 0, minindex = -1, minpot = 1.0e30; i < num; i++) + { + if(gsl_isnan(PS[d[i].index].Potential)) + terminate("pot is nan"); + + if(PS[d[i].index].Potential < minpot || minindex == -1) + { + minpot = PS[d[i].index].Potential; + minindex = d[i].index; + } + } + + MPI_Allgather(&minpot, sizeof(MyFloat), MPI_BYTE, potlist, sizeof(MyFloat), MPI_BYTE, SubComm); + + for(i = 0, mincpu = -1, minpot = 1.0e30; i < SubNTask; i++) + if(potlist[i] < minpot) + { + mincpu = i; + minpot = potlist[i]; + } + + if(mincpu < 0) + terminate("mincpu < 0"); + + myfree(potlist); + + if(SubThisTask == mincpu) + { +#ifdef CELL_CENTER_GRAVITY + if(P[minindex].Type == 0) + { + for(j = 0; j < 3; j++) + pos[j] = PS[minindex].Center[j]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(j = 0; j < 3; j++) + pos[j] = P[minindex].Pos[j]; + } + } + + MPI_Bcast(&pos[0], 3, MPI_DOUBLE, mincpu, SubComm); + /* pos[] now holds the position of minimum potential */ + /* we take that as the center */ + } + + /* let's get bulk velocity and the center-of-mass */ + + for(j = 0; j < 3; j++) + sloc[j] = vloc[j] = 0; + + for(i = 0, massloc = 0; i < num; i++) + { + part_index = d[i].index; + + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[part_index].Type == 0) + ddxx = GRAVITY_NEAREST_X(PS[part_index].Center[j] - pos[j]); + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + ddxx = GRAVITY_NEAREST_X(P[part_index].Pos[j] - pos[j]); + + sloc[j] += P[part_index].Mass * ddxx; + vloc[j] += P[part_index].Mass * P[part_index].Vel[j]; + } + massloc += P[part_index].Mass; + } + + MPI_Allreduce(sloc, s, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(vloc, v, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&massloc, &mass, 1, MPI_DOUBLE, MPI_SUM, SubComm); + + for(j = 0; j < 3; j++) + { + s[j] /= mass; /* center of mass */ + v[j] /= mass; + + s[j] += pos[j]; + + while(s[j] < 0) + s[j] += boxsize; + while(s[j] >= boxsize) + s[j] -= boxsize; + } + + bnd_energy = (double *)mymalloc("bnd_energy", num * sizeof(double)); + + for(i = 0; i < num; i++) + { + part_index = d[i].index; + + for(j = 0; j < 3; j++) + { + dv[j] = vel_to_phys * (P[part_index].Vel[j] - v[j]); + +#ifdef CELL_CENTER_GRAVITY + if(P[part_index].Type == 0) + dx[j] = atime * GRAVITY_NEAREST_X(PS[part_index].Center[j] - s[j]); + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + dx[j] = atime * GRAVITY_NEAREST_X(P[part_index].Pos[j] - s[j]); + + dv[j] += All.cf_Hrate * dx[j]; + } + + PS[part_index].BindingEnergy = PS[part_index].Potential + 0.5 * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]); + PS[part_index].BindingEnergy += All.G / All.cf_atime * P[part_index].Mass / + (All.ForceSoftening[P[part_index].SofteningType] / 2.8); /* add self-energy */ + + if(P[part_index].Type == 0) + PS[part_index].BindingEnergy += PS[part_index].Utherm; + + bnd_energy[i] = PS[part_index].BindingEnergy; + } + + parallel_sort_comm(bnd_energy, num, sizeof(double), subfind_compare_binding_energy, SubComm); + + npart = (int *)mymalloc("npart", SubNTask * sizeof(int)); + nbu_count = (int *)mymalloc("nbu_count", SubNTask * sizeof(int)); + offset = (int *)mymalloc("offset", SubNTask * sizeof(int)); + + MPI_Allgather(&num, 1, MPI_INT, npart, 1, MPI_INT, SubComm); + MPI_Allreduce(&num, &numleft, 1, MPI_INT, MPI_SUM, SubComm); + + for(i = 1, offset[0] = 0; i < SubNTask; i++) + offset[i] = offset[i - 1] + npart[i - 1]; + + j = (int)(0.25 * numleft); /* index of limiting energy value */ + + task = 0; + while(j >= npart[task]) + { + j -= npart[task]; + task++; + } + + if(SubThisTask == task) + energy_limit_local = bnd_energy[j]; + else + energy_limit_local = 1.0e30; + + MPI_Allreduce(&energy_limit_local, &energy_limit, 1, MPI_DOUBLE, MPI_MIN, SubComm); + + for(i = 0, count_bound_unbound = 0; i < num; i++) + { + if(bnd_energy[i] > 0) + count_bound_unbound++; + else + count_bound_unbound--; + } + + MPI_Allgather(&count_bound_unbound, 1, MPI_INT, nbu_count, 1, MPI_INT, SubComm); + + for(i = 0, count_bound_unbound = 0; i < SubThisTask; i++) + count_bound_unbound += nbu_count[i]; + + for(i = 0; i < num - 1; i++) + { + if(bnd_energy[i] > 0) + count_bound_unbound++; + else + count_bound_unbound--; + if(count_bound_unbound <= 0) + break; + } + + if(num > 0 && count_bound_unbound <= 0) + weakly_bound_limit_local = bnd_energy[i]; + else + weakly_bound_limit_local = -1.0e30; + + MPI_Allreduce(&weakly_bound_limit_local, &weakly_bound_limit, 1, MPI_DOUBLE, MPI_MAX, SubComm); + + for(i = 0, unbound = 0; i < num; i++) + { + p = d[i].index; + + if(PS[p].BindingEnergy > 0 && PS[p].BindingEnergy > energy_limit) + { + unbound++; + + d[i] = d[num - 1]; + num--; + i--; + } + else if(P[p].Type != 0) + (*num_non_gas)++; + } + + myfree(offset); + myfree(nbu_count); + myfree(npart); + myfree(bnd_energy); + + MPI_Allreduce(&unbound, &totunbound, 1, MPI_INT, MPI_SUM, SubComm); + MPI_Allreduce(&num, &numleft, 1, MPI_INT, MPI_SUM, SubComm); + + if(phaseflag == 0) + { + if(totunbound > 0) + phaseflag = 1; + } + else + { + if(totunbound == 0) + { + phaseflag = 0; /* this will make us repeat everything once more for all particles */ + totunbound = 1; + } + } + + iter++; + } + while(totunbound > 0 && numleft >= All.DesLinkNgb); + + return num; +} + +/*! \brief Gets new request from other task. + * + * \return void + */ +void subfind_poll_for_requests(void) +{ + int index, nsub, source, tag, ibuf[3], target, submark, task; + long long head, next, rank, buf[5]; + long long oldtail, newtail; + int task_newtail, i_newtail, task_oldtail, i_oldtail; + char msg[200]; + MPI_Status status; + + do + { + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, SubComm, &status); + + source = status.MPI_SOURCE; + tag = status.MPI_TAG; + + /* MPI_Get_count(&status, MPI_BYTE, &count); */ + switch(tag) + { + case TAG_GET_TWOHEADS: + MPI_Recv(ibuf, 2, MPI_INT, source, TAG_GET_TWOHEADS, SubComm, MPI_STATUS_IGNORE); + buf[0] = Head[ibuf[0]]; + buf[1] = Head[ibuf[1]]; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_GET_TWOHEADS_DATA, SubComm); + break; + case TAG_SET_NEWTAIL: + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SET_NEWTAIL, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + newtail = buf[1]; + oldtail = Tail[index]; /* return old tail */ + Tail[index] = newtail; + Len[index]++; + + task_newtail = (newtail >> 32); + if(task_newtail == SubThisTask) + { + i_newtail = (newtail & MASK); + Head[i_newtail] = (((long long)SubThisTask) << 32) + index; + Next[i_newtail] = -1; + } + task_oldtail = (oldtail >> 32); + if(task_oldtail == SubThisTask) + { + i_oldtail = (oldtail & MASK); + Next[i_oldtail] = newtail; + } + + buf[0] = oldtail; + MPI_Send(buf, 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_OLDTAIL, SubComm); + break; + case TAG_SET_ALL: + MPI_Recv(buf, 5 * sizeof(long long), MPI_BYTE, source, TAG_SET_ALL, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + Head[index] = buf[1]; + Tail[index] = buf[2]; + Len[index] = buf[3]; + Next[index] = buf[4]; + break; + case TAG_GET_TAILANDLEN: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + buf[0] = Tail[index]; + buf[1] = Len[index]; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_GET_TAILANDLEN_DATA, SubComm); + break; + case TAG_SET_TAILANDLEN: + MPI_Recv(buf, 3 * sizeof(long long), MPI_BYTE, source, TAG_SET_TAILANDLEN, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + Tail[index] = buf[1]; + Len[index] = buf[2]; + break; + case TAG_SET_HEADANDNEXT: + MPI_Recv(buf, 3 * sizeof(long long), MPI_BYTE, source, TAG_SET_HEADANDNEXT, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + Head[index] = buf[1]; + Next[index] = buf[2]; + break; + case TAG_SET_NEXT: + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SET_NEXT, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + Next[index] = buf[1]; + break; + case TAG_SETHEADGETNEXT: + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SETHEADGETNEXT, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + head = buf[1]; + do + { + Head[index] = head; + next = Next[index]; + task = (next >> 32); + index = (next & MASK); + } + while(next >= 0 && task == SubThisTask); + MPI_Send(&next, 1 * sizeof(long long), MPI_BYTE, source, TAG_SETHEADGETNEXT_DATA, SubComm); + break; + case TAG_GET_NEXT: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + MPI_Send(&Next[index], 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_NEXT_DATA, SubComm); + break; + case TAG_GET_HEAD: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + MPI_Send(&Head[index], 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_HEAD_DATA, SubComm); + break; + case TAG_ADD_PARTICLE: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + if(Tail[index] < 0) /* consider only particles not already in substructures */ + { + ud[LocalLen].index = index; + if(index >= NumPartGroup) + { + sprintf(msg, "What: index=%d NumPartGroup=%d\n", index, NumPartGroup); + terminate(msg); + } + LocalLen++; + } + break; + case TAG_MARK_PARTICLE: + MPI_Recv(ibuf, 3, MPI_INT, source, TAG_MARK_PARTICLE, SubComm, MPI_STATUS_IGNORE); + index = ibuf[0]; + target = ibuf[1]; + submark = ibuf[2]; + + if(PS[index].submark != HIGHBIT) + terminate("TasK=%d i=%d P[i].submark=%d?\n", SubThisTask, index, PS[index].submark); + + PS[index].TargetTask = target; + PS[index].submark = submark; + break; + case TAG_ADDBOUND: + MPI_Recv(ibuf, 2, MPI_INT, source, TAG_ADDBOUND, SubComm, &status); + index = ibuf[0]; + nsub = ibuf[1]; + if(Tail[index] == nsub) /* consider only particles in this substructure */ + { + ud[LocalLen].index = index; + LocalLen++; + } + break; + case TAG_SETRANK: + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SETRANK, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + rank = buf[1]; + do + { + Len[index] = rank++; + next = Next[index]; + if(next < 0) + break; + index = (next & MASK); + } + while((next >> 32) == SubThisTask); + buf[0] = next; + buf[1] = rank; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SETRANK_OUT, SubComm); + break; + case TAG_GET_RANK: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + rank = Len[index]; + MPI_Send(&rank, 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_RANK_DATA, SubComm); + break; + + case TAG_POLLING_DONE: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + break; + + default: + terminate("tag not present in the switch"); + break; + } + } + while(tag != TAG_POLLING_DONE); +} + +/*! \brief Sets rank in global linked list and gets next entry. + * + * \param[in] index Index in global linked list. + * \param[in, out] rank Rank to be set in linked list. + * + * \return Next entry + */ +long long subfind_distlinklist_setrank_and_get_next(long long index, long long *rank) +{ + int task, i; + long long next; + long long buf[2]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Len[i] = *rank; + *rank = *rank + 1; + next = Next[i]; + } + else + { + buf[0] = i; + buf[1] = *rank; + + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SETRANK, SubComm); + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SETRANK_OUT, SubComm, MPI_STATUS_IGNORE); + next = buf[0]; + *rank = buf[1]; + } + return next; +} + +/*! \brief Sets head in global linked list and gets next + * + * \param[in] index Index in global linked list. + * \param[in] head Head value to be set. + * + * \return Next value. + */ +long long subfind_distlinklist_set_head_get_next(long long index, long long head) +{ + int task, i; + long long buf[2]; + long long next; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Head[i] = head; + next = Next[i]; + } + else + { + buf[0] = i; + buf[1] = head; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SETHEADGETNEXT, SubComm); + MPI_Recv(&next, 1 * sizeof(long long), MPI_BYTE, task, TAG_SETHEADGETNEXT_DATA, SubComm, MPI_STATUS_IGNORE); + } + + return next; +} + +/*! \brief Sets next value in global linked list. + * + * \param[in] index Index in global linked list. + * \param[in] next Next value to be set. + * + * \return void + */ +void subfind_distlinklist_set_next(long long index, long long next) +{ + int task, i; + long long buf[2]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Next[i] = next; + } + else + { + buf[0] = i; + buf[1] = next; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SET_NEXT, SubComm); + } +} + +/*! \brief Adds particle to 'ud' list if not already in substructure. + * + * \param[in] index Index in global linked list. + * + * \return void + */ +void subfind_distlinklist_add_particle(long long index) +{ + int task, i; + char msg[200]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + if(Tail[i] < 0) /* consider only particles not already in substructures */ + { + ud[LocalLen].index = i; + if(i >= NumPartGroup) + { + sprintf(msg, "What: index=%d NumPartGroup=%d\n", i, NumPartGroup); + terminate(msg); + } + + LocalLen++; + } + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_ADD_PARTICLE, SubComm); + } +} + +/*! \brief Sets target task and submark field in 'PS' structure. + * + * \param[in] index Index in global linked list + * \param[in] target Value for TargetTask field. + * \param[in] submark Value for submark field. + * + * \return void + */ +void subfind_distlinklist_mark_particle(long long index, int target, int submark) +{ + int task, i, ibuf[3]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + if(PS[i].submark != HIGHBIT) + terminate("Tas=%d i=%d P[i].submark=%d?\n", SubThisTask, i, PS[i].submark); + + PS[i].TargetTask = target; + PS[i].submark = submark; + } + else + { + ibuf[0] = i; + ibuf[1] = target; + ibuf[2] = submark; + MPI_Send(ibuf, 3, MPI_INT, task, TAG_MARK_PARTICLE, SubComm); + } +} + +/*! \brief Add bound particle to 'ud' array. + * + * \param[in] index Index in global linked list. + * \param[in] nsub Number of subgroups (i.e. if Tail index the same, not yet + * in a substructrue). + * + * \return void + */ +void subfind_distlinklist_add_bound_particles(long long index, int nsub) +{ + int task, i, ibuf[2]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + if(Tail[i] == nsub) /* consider only particles not already in substructures */ + { + ud[LocalLen].index = i; + LocalLen++; + } + } + else + { + ibuf[0] = i; + ibuf[1] = nsub; + MPI_Send(ibuf, 2, MPI_INT, task, TAG_ADDBOUND, SubComm); + } +} + +/*! \brief Get Next value from global linked list. + * + * \param[in] index Index in global linked list. + * + * \return + */ +long long subfind_distlinklist_get_next(long long index) +{ + int task, i; + long long next; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + next = Next[i]; + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_GET_NEXT, SubComm); + MPI_Recv(&next, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_NEXT_DATA, SubComm, MPI_STATUS_IGNORE); + } + + return next; +} + +/*! \brief Get rank value from global linked list. + * + * \param[in] index Index in global linked list. + * + * \return Rank value. + */ +long long subfind_distlinklist_get_rank(long long index) +{ + int task, i; + long long rank; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + rank = Len[i]; + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_GET_RANK, SubComm); + MPI_Recv(&rank, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_RANK_DATA, SubComm, MPI_STATUS_IGNORE); + } + + return rank; +} + +/*! \brief Get the head value of global linked list. + * + * \param[in] index Index in the global linked list. + * + * \return Head value. + */ +long long subfind_distlinklist_get_head(long long index) +{ + int task, i; + long long head; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + head = Head[i]; + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_GET_HEAD, SubComm); + MPI_Recv(&head, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_HEAD_DATA, SubComm, MPI_STATUS_IGNORE); + } + + return head; +} + +/*! \brief Gets the head value of two entries in linked list. + * + * \param[in] ngb_index1 Index of first subgroup. + * \param[in] ngb_index2 Index of second subgroup. + * \param[out] head Head value of first subgroup. + * \param[out] head_attach head value of second subgroup. + * + * \return void + */ +void subfind_distlinklist_get_two_heads(long long ngb_index1, long long ngb_index2, long long *head, long long *head_attach) +{ + int task, i1, i2, ibuf[2]; + long long buf[2]; + + task = (ngb_index1 >> 32); + i1 = (ngb_index1 & MASK); + i2 = (ngb_index2 & MASK); + + if(SubThisTask == task) + { + *head = Head[i1]; + *head_attach = Head[i2]; + } + else + { + ibuf[0] = i1; + ibuf[1] = i2; + MPI_Send(ibuf, 2, MPI_INT, task, TAG_GET_TWOHEADS, SubComm); + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_GET_TWOHEADS_DATA, SubComm, MPI_STATUS_IGNORE); + *head = buf[0]; + *head_attach = buf[1]; + } +} + +/*! \brief Sets Head and Next entries in global linked list. + * + * \param[in] index Index in global linked list. + * \param[in] head Value for Head. + * \param[in] next Value for Next. + * + * \return void + */ +void subfind_distlinklist_set_headandnext(long long index, long long head, long long next) +{ + int task, i; + long long buf[3]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Head[i] = head; + Next[i] = next; + } + else + { + buf[0] = i; + buf[1] = head; + buf[2] = next; + MPI_Send(buf, 3 * sizeof(long long), MPI_BYTE, task, TAG_SET_HEADANDNEXT, SubComm); + } +} + +/*! \brief Returns old tail, sets a new tail, increases length of linked list. + * + * \param[in] index Index of the subgroup. + * \param[out] tail Old value for tail. + * \param[in] newtail New value for tail. + * + * \return return code + */ +int subfind_distlinklist_get_tail_set_tail_increaselen(long long index, long long *tail, long long newtail) +{ + int task, i, task_newtail, i_newtail, task_oldtail, i_oldtail, retcode; + long long oldtail; + long long buf[2]; + + task = (index >> 32); + i = (index & MASK); + + retcode = 0; + + if(SubThisTask == task) + { + oldtail = Tail[i]; + Tail[i] = newtail; + Len[i]++; + *tail = oldtail; + + task_newtail = (newtail >> 32); + if(task_newtail == SubThisTask) + { + i_newtail = (newtail & MASK); + Head[i_newtail] = index; + Next[i_newtail] = -1; + retcode |= 1; + } + task_oldtail = (oldtail >> 32); + if(task_oldtail == SubThisTask) + { + i_oldtail = (oldtail & MASK); + Next[i_oldtail] = newtail; + retcode |= 2; + } + } + else + { + buf[0] = i; + buf[1] = newtail; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SET_NEWTAIL, SubComm); + MPI_Recv(&oldtail, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_OLDTAIL, SubComm, MPI_STATUS_IGNORE); + *tail = oldtail; + + if((newtail >> 32) == task) + retcode |= 1; + if((oldtail >> 32) == task) + retcode |= 2; + } + + return retcode; +} + +/*! \brief Set tail and len in global linked list. + * + * \param[in] index Index in global linked list. + * \param[in] tail Value to be set in 'Tail'. + * \param[in] len Value to be set in 'Len'. + * + * \return void + */ +void subfind_distlinklist_set_tailandlen(long long index, long long tail, int len) +{ + int task, i; + long long buf[3]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Tail[i] = tail; + Len[i] = len; + } + else + { + buf[0] = i; + buf[1] = tail; + buf[2] = len; + MPI_Send(buf, 3 * sizeof(long long), MPI_BYTE, task, TAG_SET_TAILANDLEN, SubComm); + } +} + +/*! \brief Get tail and len in global linked list. + * + * \param[in] index Index in global linked list. + * \param[out] tail 'Tail' value. + * \param[out] len 'Len' value. + * + * \return void + */ +void subfind_distlinklist_get_tailandlen(long long index, long long *tail, int *len) +{ + int task, i; + long long buf[2]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + *tail = Tail[i]; + *len = Len[i]; + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_GET_TAILANDLEN, SubComm); + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_GET_TAILANDLEN_DATA, SubComm, MPI_STATUS_IGNORE); + *tail = buf[0]; + *len = buf[1]; + } +} + +/*! \brief Sets head, tail, len and next in global linked list + * + * \param[in] index Index in global linked list. + * \param[in] head Value for 'Head'. + * \param[in] tail Value for 'Tail'. + * \param[in] len Value for 'Len'. + * \param[in] next Value for 'Next'. + * + * \return void + */ +void subfind_distlinklist_set_all(long long index, long long head, long long tail, int len, long long next) +{ + int task, i; + long long buf[5]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Head[i] = head; + Tail[i] = tail; + Len[i] = len; + Next[i] = next; + } + else + { + buf[0] = i; + buf[1] = head; + buf[2] = tail; + buf[3] = len; + buf[4] = next; + MPI_Send(buf, 5 * sizeof(long long), MPI_BYTE, task, TAG_SET_ALL, SubComm); + } +} + +/*! \brief Comparison function of sort_density_data objects. + * + * Compares element density. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1); -1 if a > b + */ +int subfind_compare_densities(const void *a, const void *b) /* largest density first */ +{ + if(((struct sort_density_data *)a)->density > (((struct sort_density_data *)b)->density)) + return -1; + + if(((struct sort_density_data *)a)->density < (((struct sort_density_data *)b)->density)) + return +1; + + return 0; +} + +#endif diff --git a/src/amuse/community/arepo/src/subfind/subfind_density.c b/src/amuse/community/arepo/src/subfind/subfind_density.c new file mode 100644 index 0000000000..0b61aa9d97 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_density.c @@ -0,0 +1,662 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_density.c + * \date 05/2018 + * \brief Smoothing length and density calculation for particles. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * double subfind_density(int mode) + * static int subfind_density_evaluate(int target, int mode, + * int threadid) + * void subfind_density_hsml_guess(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND + +#include "../fof/fof.h" +#include "subfind.h" + +static char *Todo; +static int *DM_NumNgb; +#ifdef SUBFIND_CALC_MORE +static MyFloat *Vx, *Vy, *Vz; +#endif /* #ifdef SUBFIND_CALC_MORE */ + +static int subfind_density_evaluate(int target, int mode, int threadid); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + in->Pos[0] = SphP[i].Center[0]; + in->Pos[1] = SphP[i].Center[1]; + in->Pos[2] = SphP[i].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + } + in->Hsml = PS[i].Hsml; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + int Ngb; + MyFloat Rho; +#ifdef SUBFIND_CALC_MORE + MyFloat VelDisp, Vx, Vy, Vz, RhoDM; +#endif /* #ifdef SUBFIND_CALC_MORE */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + DM_NumNgb[i] = out->Ngb; + PS[i].Density = out->Rho; +#ifdef SUBFIND_CALC_MORE + Vx[i] = out->Vx; + Vy[i] = out->Vy; + Vz[i] = out->Vz; + PS[i].SubfindVelDisp = out->VelDisp; + PS[i].SubfindDMDensity = out->RhoDM; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } + else /* combine */ + { + DM_NumNgb[i] += out->Ngb; + PS[i].Density += out->Rho; +#ifdef SUBFIND_CALC_MORE + Vx[i] += out->Vx; + Vy[i] += out->Vy; + Vz[i] += out->Vz; + PS[i].SubfindVelDisp += out->VelDisp; + PS[i].SubfindDMDensity += out->RhoDM; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPart) + break; + + if(Todo[i]) + subfind_density_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_density_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Calculates smoothing length or density via neighbor search. + * + * \param[in] mode Mode if the function: FIND_SMOOTHING_LENGTHS, or to + * calculate densities. + * + * \return Time spent in this routine. + */ +double subfind_density(int mode) +{ + long long ntot; + int i, npleft, iter; + MyFloat *Left, *Right; + double t0, t1, tstart, tend; + + if(mode == FIND_SMOOTHING_LENGTHS) + mpi_printf("SUBFIND: finding smoothing length for all particles\n"); + else + mpi_printf("SUBFIND: finding total densities around all particles\n"); + + tstart = second(); + + int HsmlFlag = 0; + +#ifdef SUBFIND_CALC_MORE + HsmlFlag = 1; /* in this case, calculate densities for all particles, not only those in groups */ +#endif /* #ifdef SUBFIND_CALC_MORE */ + + DM_NumNgb = (int *)mymalloc_movable(&DM_NumNgb, "DM_NumNgb", sizeof(int) * NumPart); + Left = (MyFloat *)mymalloc_movable(&Left, "Left", sizeof(MyFloat) * NumPart); + Right = (MyFloat *)mymalloc_movable(&Right, "Right", sizeof(MyFloat) * NumPart); + Todo = (char *)mymalloc_movable(&Todo, "Todo", sizeof(char) * NumPart); + +#ifdef SUBFIND_CALC_MORE + Vx = (MyFloat *)mymalloc("Vx", sizeof(MyFloat) * NumPart); + Vy = (MyFloat *)mymalloc("Vy", sizeof(MyFloat) * NumPart); + Vz = (MyFloat *)mymalloc("Vz", sizeof(MyFloat) * NumPart); +#endif /* #ifdef SUBFIND_CALC_MORE */ + + generic_set_MaxNexport(); + + for(i = 0; i < NumPart; i++) + { + Left[i] = Right[i] = 0; + DM_NumNgb[i] = 0; + Todo[i] = 1; + if((PS[i].GrNr >= TotNgroups) && (HsmlFlag == 0)) // particle not in groups + Todo[i] = 0; + +#ifdef REFINEMENT_HIGH_RES_GAS + if((PS[i].GrNr >= TotNgroups) && (P[i].Type == 4 || P[i].Type == 5)) // particle of type 4 or 5 but not in group + Todo[i] = 0; + + if(P[i].Type != 0 && P[i].Type != 1 && P[i].Type != 4 && P[i].Type != 5) + Todo[i] = 0; + if(P[i].Type == 0) + if(SphP[i].AllowRefinement == 0) + Todo[i] = 0; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + + PS[i].Density = 0; +#ifdef SUBFIND_CALC_MORE + PS[i].SubfindHsml = 0; + PS[i].SubfindDensity = 0; + PS[i].SubfindDMDensity = 0; + PS[i].SubfindVelDisp = 0; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } + + iter = 0; + + /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ + do + { + t0 = second(); + + generic_comm_pattern(NumPart, kernel_local, kernel_imported); + + /* do final operations on results */ + for(i = 0, npleft = 0; i < NumPart; i++) + { + /* now check whether we had enough neighbours */ + + if(Todo[i] && mode == FIND_SMOOTHING_LENGTHS) + { + if(abs(DM_NumNgb[i] - All.DesNumNgb) > All.MaxNumNgbDeviation && + ((Right[i] - Left[i]) > 1.0e-4 * Left[i] || Left[i] == 0 || Right[i] == 0)) + { + /* need to redo this particle */ + npleft++; + + if(DM_NumNgb[i] < All.DesNumNgb) + Left[i] = (MyFloat)dmax(PS[i].Hsml, Left[i]); + else + { + if(Right[i] != 0) + { + if(PS[i].Hsml < Right[i]) + Right[i] = PS[i].Hsml; + } + else + Right[i] = PS[i].Hsml; + } + + if(iter >= MAXITER - 10) + { + printf("SUBFIND: i=%d task=%d ID=%d Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, + ThisTask, (int)P[i].ID, PS[i].Hsml, Left[i], Right[i], (double)DM_NumNgb[i], Right[i] - Left[i], + P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]); + myflush(stdout); + } + + if(Right[i] > 0 && Left[i] > 0) + PS[i].Hsml = (MyFloat)pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3); + else + { + if(Right[i] == 0 && Left[i] == 0) + terminate("can't occur"); + + if(Right[i] == 0 && Left[i] > 0) + PS[i].Hsml *= 1.26; + + if(Right[i] > 0 && Left[i] == 0) + PS[i].Hsml /= 1.26; + } + } + else + Todo[i] = 0; + } + } + + sumup_large_ints(1, &npleft, &ntot); + + t1 = second(); + + if(ntot > 0 && mode == FIND_SMOOTHING_LENGTHS) + { + iter++; + + if(iter > 0) + mpi_printf("SUBFIND: ngb iteration %2d: need to repeat for %15lld particles. (took %g sec)\n", iter, ntot, + timediff(t0, t1)); + + if(iter > MAXITER) + terminate("failed to converge in neighbour iteration in density()\n"); + } + } + while(ntot > 0); + +#ifdef SUBFIND_CALC_MORE + double vel_to_phys; + + vel_to_phys = 1.0 / All.cf_atime; + + for(i = 0; i < NumPart; i++) + { + Vx[i] /= DM_NumNgb[i]; + Vy[i] /= DM_NumNgb[i]; + Vz[i] /= DM_NumNgb[i]; + PS[i].SubfindVelDisp /= DM_NumNgb[i]; + PS[i].SubfindVelDisp = vel_to_phys * sqrt(PS[i].SubfindVelDisp - Vx[i] * Vx[i] - Vy[i] * Vy[i] - Vz[i] * Vz[i]); + } +#endif /* #ifdef SUBFIND_CALC_MORE */ + +#ifdef SUBFIND_CALC_MORE + myfree_movable(Vz); + myfree_movable(Vy); + myfree_movable(Vx); +#endif /* #ifdef SUBFIND_CALC_MORE */ + myfree_movable(Todo); + myfree_movable(Right); + myfree_movable(Left); + myfree_movable(DM_NumNgb); + +#ifdef SUBFIND_CALC_MORE + for(i = 0; i < NumPart; i++) + { + PS[i].SubfindHsml = PS[i].Hsml; + PS[i].SubfindDensity = PS[i].Density; + } +#endif /* #ifdef SUBFIND_CALC_MORE */ + + tend = second(); + return timediff(tstart, tend); +} + +/*! \brief Evaluate function of subfind density calculation. + * + * \param[in] target Index of particle of interest + * \param[in] mode Local or imported particles? + * \param[in] treadid ID of thread. + * + * \return 0 + */ +static int subfind_density_evaluate(int target, int mode, int threadid) +{ + int k, numnodes, *firstnode, type; + double hsml; + double rhosum = 0; + MyDouble *pos; + int numngb = 0, no, p; + struct NODE *current; + double dx, dy, dz, r2, mass; + double h2, hinv, hinv3, r, u, wk; + MyDouble xtmp, ytmp, ztmp; +#ifdef SUBFIND_CALC_MORE + double vxsum = 0, vysum = 0, vzsum = 0, v2sum = 0, rhodmsum = 0; +#endif /* #ifdef SUBFIND_CALC_MORE */ + + data_in local, *target_data; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + hsml = target_data->Hsml; + + h2 = hsml * hsml; + hinv = 1.0 / hsml; + hinv3 = hinv * hinv * hinv; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Tree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Tree_MaxPart) /* single particle */ + { + p = no; + no = Nextnode[no]; + + dx = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - pos[0]); + if(dx > hsml) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - pos[1]); + if(dy > hsml) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - pos[2]); + if(dz > hsml) + continue; + + if((r2 = (dx * dx + dy * dy + dz * dz)) > hsml * hsml) + continue; + + mass = P[p].Mass; + type = P[p].Type; + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + current = &Nodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + double dist = hsml + 0.5 * current->len; + + dx = (MyFloat)FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > dist) + continue; + dy = (MyFloat)FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > dist) + continue; + dz = (MyFloat)FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > dist) + continue; + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + continue; + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + int n = no - Tree_ImportedNodeOffset; + no = Nextnode[no - Tree_MaxNodes]; + + dx = FOF_NEAREST_LONG_X(Tree_Points[n].Pos[0] - pos[0]); + if(dx > hsml) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Points[n].Pos[1] - pos[1]); + if(dy > hsml) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Points[n].Pos[2] - pos[2]); + if(dz > hsml) + continue; + + if((r2 = (dx * dx + dy * dy + dz * dz)) > hsml * hsml) + continue; + + mass = Tree_Points[n].Mass; + type = Tree_Points[n].Type; + + p = -1; + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("can't be"); + + if(target >= 0) /* if no target is given, export will not occur */ + tree_treefind_export_node_threads(no, target, threadid); + + no = Nextnode[no - Tree_MaxNodes]; + continue; + } + + if((1 << type) & (FOF_PRIMARY_LINK_TYPES)) + { + numngb++; + +#ifdef SUBFIND_CALC_MORE + if(p < 0) + terminate("this should not occur"); + + vxsum += P[p].Vel[0]; + vysum += P[p].Vel[1]; + vzsum += P[p].Vel[2]; + v2sum += P[p].Vel[0] * P[p].Vel[0] + P[p].Vel[1] * P[p].Vel[1] + P[p].Vel[2] * P[p].Vel[2]; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } + + if(((1 << type) & (FOF_PRIMARY_LINK_TYPES)) || ((1 << type) & (FOF_SECONDARY_LINK_TYPES))) + if(r2 < h2) + { + r = sqrt(r2); + + u = r * hinv; + + if(u < 0.5) + wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u); + else + wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u); + + rhosum += mass * wk; + +#ifdef SUBFIND_CALC_MORE + if((1 << type) & (FOF_PRIMARY_LINK_TYPES)) + rhodmsum += mass * wk; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } + } + } + + out.Ngb = numngb; + out.Rho = rhosum; +#ifdef SUBFIND_CALC_MORE + out.Vx = vxsum; + out.Vy = vysum; + out.Vz = vzsum; + out.VelDisp = v2sum; + out.RhoDM = rhodmsum; +#endif /* #ifdef SUBFIND_CALC_MORE */ + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +/*! \brief Sets Hsml to an initial guess to reduce number of iterations for + * to get final smoothing length (Hsml). + * + * \return void + */ +void subfind_density_hsml_guess(void) +{ + int i; + double hsml_prev = 0; + + for(i = 0; i < NumPart; i++) + { + int no, p; + + if((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES)) + { + no = Father[i]; + + while(8 * All.DesNumNgb * P[i].Mass > Nodes[no].u.d.mass && Nodes[no].len == 0) + { + p = Nodes[no].u.d.father; + + if(p < 0) + break; + + no = p; + } + + PS[i].Hsml = hsml_prev = (pow(3.0 / (4 * M_PI) * All.DesNumNgb * P[i].Mass / Nodes[no].u.d.mass, 1.0 / 3) * Nodes[no].len); + + if(PS[i].Hsml == 0) + { + printf("Hsml=0 task=%d i=%d no=%d Nodes[no].len=%g Nodes[no].u.d.mass=%g P[i].Mass=%g type=%d ID=%llu pos=(%g|%g|%g)\n", + ThisTask, i, no, Nodes[no].len, Nodes[no].u.d.mass, P[i].Mass, P[i].Type, (long long)P[i].ID, P[i].Pos[0], + P[i].Pos[1], P[i].Pos[2]); + terminate("zero hsml guess\n"); + } + } + else + { + if(hsml_prev) + PS[i].Hsml = hsml_prev; + else + PS[i].Hsml = All.SofteningTable[P[i].SofteningType]; + } + } +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_distribute.c b/src/amuse/community/arepo/src/subfind/subfind_distribute.c new file mode 100644 index 0000000000..80b492193c --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_distribute.c @@ -0,0 +1,421 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_distribute.c + * \date 05/2018 + * \brief Moves grops and particles across MPI tasks form their + * simulation ordering to a subfind ordering. + * \details contains functions: + * void subfind_distribute_groups(void) + * void subfind_distribute_particles(MPI_Comm Communicator) + * void subfind_reorder_P(int *Id, int Nstart, int N) + * void subfind_reorder_PS(int *Id, int Nstart, int N) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../fof/fof.h" +#include "subfind.h" + +#ifdef SUBFIND +static struct group_properties *send_Group; + +/*! \brief Distributes groups equally on MPI tasks. + * + * \return void + */ +void subfind_distribute_groups(void) +{ + int i, nexport, nimport, target, ngrp, recvTask; + + /* count how many we have of each task */ + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + + for(i = 0; i < Ngroups; i++) + { + target = Group[i].TargetTask; + + if(target < 0 || target >= NTask) + terminate("target < 0 || target >= NTask"); + + if(target != ThisTask) + Send_count[target]++; + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(i = 0, nexport = 0, nimport = 0, Recv_offset[0] = Send_offset[0] = 0; i < NTask; i++) + { + nimport += Recv_count[i]; + nexport += Send_count[i]; + + if(i > 0) + { + Send_offset[i] = Send_offset[i - 1] + Send_count[i - 1]; + Recv_offset[i] = Recv_offset[i - 1] + Recv_count[i - 1]; + } + } + + send_Group = (struct group_properties *)mymalloc_movable(&send_Group, "send_Group", nexport * sizeof(struct group_properties)); + + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + + for(i = 0; i < Ngroups; i++) + { + target = Group[i].TargetTask; + + if(target != ThisTask) + { + send_Group[Send_offset[target] + Send_count[target]] = Group[i]; + Send_count[target]++; + + Group[i] = Group[Ngroups - 1]; + Ngroups--; + i--; + } + } + + if(Ngroups + nimport > MaxNgroups) + { +#ifdef VERBOSE + printf("SUBFIND: Task=%d: (Ngroups=%d) + (nimport=%d) > (MaxNgroups=%d). Will increase MaxNgroups.\n", ThisTask, Ngroups, + nimport, MaxNgroups); +#endif /* #ifdef VERBOSE */ + MaxNgroups = Ngroups + nimport; + Group = (struct group_properties *)myrealloc_movable(Group, sizeof(struct group_properties) * MaxNgroups); + } + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the group info */ + MPI_Sendrecv(&send_Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct group_properties), MPI_BYTE, + recvTask, TAG_DENS_A, &Group[Ngroups + Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct group_properties), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + Ngroups += nimport; + + myfree_movable(send_Group); +} + +static struct particle_data *partBuf; +static struct subfind_data *subBuf; + +/* \brief Distributes particles on MPI tasks. + * + * This function redistributes the particles in P[] and PS[] according to what + * is stored in PS[].TargetTask, and PS[].TargetIndex. NOTE: The associated + * SphP[] is not moved, i.e. the association is broken until the particles are + * moved back into the original order! + * + * \param[in] Communicator MPI communicator. + * + * \return void + */ +void subfind_distribute_particles(MPI_Comm Communicator) +{ + int nimport, nexport; + int i, j, n, ngrp, target; + int max_load, load; + int CommThisTask, CommNTask; + + MPI_Comm_size(Communicator, &CommNTask); + MPI_Comm_rank(Communicator, &CommThisTask); + + for(n = 0; n < CommNTask; n++) + Send_count[n] = 0; + + for(n = 0; n < NumPart; n++) + { + target = PS[n].TargetTask; + + if(target != CommThisTask) + { + if(target < 0 || target >= CommNTask) + terminate("n=%d targettask=%d", n, target); + + Send_count[target]++; + } + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + + for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < CommNTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* for resize */ + load = (NumPart + nimport - nexport); + MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator); + + partBuf = (struct particle_data *)mymalloc_movable(&partBuf, "partBuf", nexport * sizeof(struct particle_data)); + subBuf = (struct subfind_data *)mymalloc_movable(&subBuf, "subBuf", nexport * sizeof(struct subfind_data)); + + for(i = 0; i < CommNTask; i++) + Send_count[i] = 0; + + for(n = 0; n < NumPart; n++) + { + target = PS[n].TargetTask; + + if(target != CommThisTask) + { + partBuf[Send_offset[target] + Send_count[target]] = P[n]; + subBuf[Send_offset[target] + Send_count[target]] = PS[n]; + + P[n] = P[NumPart - 1]; + PS[n] = PS[NumPart - 1]; + + Send_count[target]++; + NumPart--; + n--; + } + } + + /* do resize */ + if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart) + { + All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + reallocate_memory_maxpart(); + PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data)); + } + + for(i = 0; i < CommNTask; i++) + Recv_offset[i] += NumPart; + +#ifndef NO_ISEND_IRECV_IN_DOMAIN + + MPI_Request *requests = (MPI_Request *)mymalloc("requests", 8 * CommNTask * sizeof(MPI_Request)); + int n_requests = 0; + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = CommThisTask ^ ngrp; + + if(target < CommNTask) + { + if(Recv_count[target] > 0) + { + MPI_Irecv(P + Recv_offset[target], Recv_count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + Communicator, &requests[n_requests++]); + MPI_Irecv(PS + Recv_offset[target], Recv_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY, + Communicator, &requests[n_requests++]); + } + } + } + + MPI_Barrier(Communicator); /* not really necessary, but this will guarantee that all receives are + posted before the sends, which helps the stability of MPI on + bluegene, and perhaps some mpich1-clusters */ + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = CommThisTask ^ ngrp; + + if(target < CommNTask) + { + if(Send_count[target] > 0) + { + MPI_Isend(partBuf + Send_offset[target], Send_count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + Communicator, &requests[n_requests++]); + MPI_Isend(subBuf + Send_offset[target], Send_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY, + Communicator, &requests[n_requests++]); + } + } + } + + MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE); + myfree(requests); + +#else /* #ifndef NO_ISEND_IRECV_IN_DOMAIN */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = CommThisTask ^ ngrp; + + if(target < CommNTask) + { + if(Send_count[target] > 0 || Recv_count[target] > 0) + { + MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA, P + Recv_offset[target], Recv_count[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA, Communicator, MPI_STATUS_IGNORE); + + MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY, + PS + Recv_offset[target], Recv_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY, + Communicator, MPI_STATUS_IGNORE); + } + } + } +#endif /* #ifndef NO_ISEND_IRECV_IN_DOMAIN #else */ + + NumPart += nimport; + myfree_movable(subBuf); + myfree_movable(partBuf); + + /* finally, let's also address the desired local order according to PS[].TargetIndex */ + + struct fof_local_sort_data *mp; + int *Id; + + mp = (struct fof_local_sort_data *)mymalloc("mp", sizeof(struct fof_local_sort_data) * (NumPart)); + Id = (int *)mymalloc("Id", sizeof(int) * (NumPart)); + + for(i = 0; i < NumPart; i++) + { + mp[i].index = i; + mp[i].targetindex = PS[i].TargetIndex; + } + + qsort(mp, NumPart, sizeof(struct fof_local_sort_data), fof_compare_local_sort_data_targetindex); + + for(i = 0; i < NumPart; i++) + Id[mp[i].index] = i; + + subfind_reorder_P(Id, 0, NumPart); + + for(i = 0; i < NumPart; i++) + Id[mp[i].index] = i; + + subfind_reorder_PS(Id, 0, NumPart); + + myfree(Id); + myfree(mp); +} + +/*! \brief Reorders elements in the P array. + * + * \param[in] Id Array containing ordering. + * \param[in] Nstart Start index (in Id and P). + * \param[in] N Final element index + 1. + * + * \return void + */ +void subfind_reorder_P(int *Id, int Nstart, int N) +{ + int i; + struct particle_data Psave, Psource; + int idsource, idsave, dest; + + for(i = Nstart; i < N; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + idsource = Id[i]; + + dest = Id[i]; + + do + { + Psave = P[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +/*! \brief Reorders elements in the PS array. + * + * \param[in] Id Array containing ordering. + * \param[in] Nstart Start index (in Id and P). + * \param[in] N Final element index + 1. + * + * \return void + */ +void subfind_reorder_PS(int *Id, int Nstart, int N) +{ + int i; + struct subfind_data PSsave, PSsource; + int idsource, idsave, dest; + + for(i = Nstart; i < N; i++) + { + if(Id[i] != i) + { + PSsource = PS[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + PSsave = PS[dest]; + idsave = Id[dest]; + + PS[dest] = PSsource; + Id[dest] = idsource; + + if(dest == i) + break; + + PSsource = PSsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c b/src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c new file mode 100644 index 0000000000..8faaba4542 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c @@ -0,0 +1,539 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_findlinkngb.c + * \date 05/2018 + * \brief Algorithm to find smoothing lengths of particles to get a + * desried number of neighbours. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void subfind_find_linkngb(void) + * static int subfind_linkngb_evaluate(int target, int mode, + * int threadid) + * int subfind_treefind_collective_export_node_threads(int no, + * int i, int thread_id) + * static int subfind_ngb_compare_dist(const void *a, const + * void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "subfind.h" + +static int subfind_ngb_compare_dist(const void *a, const void *b); +static int subfind_linkngb_evaluate(int target, int mode, int threadid); + +static int *DM_NumNgb; +static double *Dist2list; +static int *Ngblist; +static MyFloat *Left, *Right; +static char *Todo; + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat DM_Hsml; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + in->Pos[0] = PS[i].Center[0]; + in->Pos[1] = PS[i].Center[1]; + in->Pos[2] = PS[i].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + } + + in->DM_Hsml = PS[i].Hsml; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + int Ngb; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays. + * \param[in] i Index of particle. + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + DM_NumNgb[i] = out->Ngb; + } + else /* combine */ + { + DM_NumNgb[i] += out->Ngb; + } +} + +#define USE_SUBCOMM_COMMUNICATOR +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + { + int j, threadid = get_thread_num(); + + for(j = 0; j < SubNTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPartGroup) + break; + + if(Todo[i]) + subfind_linkngb_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_linkngb_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Iteratvie search for particle smoothing length to enclose a given + * number of neighbours. + * + * \return void + */ +void subfind_find_linkngb(void) +{ + long long ntot; + int i, npleft, iter = 0; + double t0, t1; + + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: Start find_linkngb. (%d particles on root-task)\n", ThisTask, NumPartGroup); + + /* allocate buffers to arrange communication */ + + Ngblist = (int *)mymalloc("Ngblist", NumPartGroup * sizeof(int)); + Dist2list = (double *)mymalloc("Dist2list", NumPartGroup * sizeof(double)); + + generic_set_MaxNexport(); + + Left = (MyFloat *)mymalloc("Left", sizeof(MyFloat) * NumPartGroup); + Right = (MyFloat *)mymalloc("Right", sizeof(MyFloat) * NumPartGroup); + Todo = (char *)mymalloc("Todo", sizeof(char) * NumPartGroup); + DM_NumNgb = (int *)mymalloc_movable(&DM_NumNgb, "DM_NumNgb", sizeof(int) * NumPartGroup); + + for(i = 0; i < NumPartGroup; i++) + { + Left[i] = Right[i] = 0; + Todo[i] = 1; + } + + /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ + do + { + t0 = second(); + + generic_comm_pattern(NumPartGroup, kernel_local, kernel_imported); + + /* do final operations on results */ + for(i = 0, npleft = 0; i < NumPartGroup; i++) + { + /* now check whether we had enough neighbours */ + if(Todo[i]) + { + if(DM_NumNgb[i] != All.DesLinkNgb && ((Right[i] - Left[i]) > 1.0e-6 * Left[i] || Left[i] == 0 || Right[i] == 0)) + { + /* need to redo this particle */ + npleft++; + + if(DM_NumNgb[i] < All.DesLinkNgb) + Left[i] = dmax(PS[i].Hsml, Left[i]); + else + { + if(Right[i] != 0) + { + if(PS[i].Hsml < Right[i]) + Right[i] = PS[i].Hsml; + } + else + Right[i] = PS[i].Hsml; + } + + if(iter >= MAXITER - 10) + { + printf("i=%d task=%d ID=%d DM_Hsml=%g Left=%g Right=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, ThisTask, + (int)P[i].ID, PS[i].Hsml, Left[i], Right[i], (double)(Right[i] - Left[i]), P[i].Pos[0], P[i].Pos[1], + P[i].Pos[2]); + fflush(stdout); + } + + if(Right[i] > 0 && Left[i] > 0) + PS[i].Hsml = pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3); + else + { + if(Right[i] == 0 && Left[i] == 0) + terminate("can't occur"); + + if(Right[i] == 0 && Left[i] > 0) + PS[i].Hsml *= 1.26; + + if(Right[i] > 0 && Left[i] == 0) + PS[i].Hsml /= 1.26; + } + } + else + Todo[i] = 0; + } + } + + sumup_large_ints_comm(1, &npleft, &ntot, SubComm); + + t1 = second(); + + if(ntot > 0) + { + iter++; + + if(iter > 0 && SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: find linkngb iteration %d, need to repeat for %lld particles. (took %g sec)\n", + ThisTask, iter, ntot, timediff(t0, t1)); + fflush(stdout); + } + + if(iter > MAXITER) + terminate("failed to converge in neighbour iteration in density()\n"); + } + } + while(ntot > 0); + + myfree(DM_NumNgb); + myfree(Todo); + myfree(Right); + myfree(Left); + + myfree(Dist2list); + myfree(Ngblist); + + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: Done with find_linkngb\n", ThisTask); +} + +/*! \brief Evaluate function for the neighbor search algorithm. + * + * \param[in] target Index of particle of interest. + * \param[in] mode Local or imported particles? + * \param[in] treadid ID of thread. + * + * \return 0 + */ +static int subfind_linkngb_evaluate(int target, int mode, int threadid) +{ + int no, numnodes, *firstnode, numngb; + double hsml; + MyDouble *pos; + int i, k, p, exported = 0; + struct NODE *current; + double dx, dy, dz, dist, r2; + MyDouble xtmp, ytmp, ztmp; + + data_in local, *in; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + in = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + in = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = in->Pos; + hsml = in->DM_Hsml; + + numngb = 0; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = SubTree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = SubNodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < SubTree_MaxPart) /* single particle */ + { + p = no; + no = SubNextnode[no]; + + dist = hsml; + dx = FOF_NEAREST_LONG_X(SubTree_Pos_list[3 * p + 0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(SubTree_Pos_list[3 * p + 1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(SubTree_Pos_list[3 * p + 2] - pos[2]); + if(dz > dist) + continue; + if((r2 = (dx * dx + dy * dy + dz * dz)) > dist * dist) + continue; + + Dist2list[numngb] = r2; + Ngblist[numngb++] = p; + } + else if(no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + if(mode == 1) + { + if(no < SubTree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the + branch */ + break; + } + + current = &SubNodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + dist = hsml + 0.5 * current->len; + dx = FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > dist) + continue; + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else + { /* pseudo particle */ + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + if(target >= 0) /* if no target is given, export will not occur */ + { + exported = 1; + + if(mode == MODE_LOCAL_PARTICLES) + subfind_treefind_collective_export_node_threads(no, target, threadid); + } + + no = SubNextnode[no - SubTree_MaxNodes]; + } + } + } + + if(mode == MODE_LOCAL_PARTICLES) /* local particle */ + if(exported == 0) /* completely local */ + if(numngb >= All.DesLinkNgb) + { + R2list = (r2type *)mymalloc("R2list", sizeof(r2type) * numngb); + for(i = 0; i < numngb; i++) + { + R2list[i].index = Ngblist[i]; + R2list[i].r2 = Dist2list[i]; + } + + qsort(R2list, numngb, sizeof(r2type), subfind_ngb_compare_dist); + + PS[target].Hsml = sqrt(R2list[All.DesLinkNgb - 1].r2); + numngb = All.DesLinkNgb; + + for(i = 0; i < numngb; i++) + { + Ngblist[i] = R2list[i].index; + Dist2list[i] = R2list[i].r2; + } + + myfree(R2list); + } + + out.Ngb = numngb; + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +/*! \brief Prepares node export. + * + * \param[in] no Index of node. + * \param[in] i Index of particle. + * \param[in] thread_id Export thread. + * + * \return 0 + */ +int subfind_treefind_collective_export_node_threads(int no, int i, int thread_id) +{ + /* The task indicated by the pseudoparticle node */ + int task = SubDomainTask[no - (SubTree_MaxPart + SubTree_MaxNodes)]; + + if(Thread[thread_id].Exportflag[task] != i) + { + Thread[thread_id].Exportflag[task] = i; + int nexp = Thread[thread_id].Nexport++; + Thread[thread_id].PartList[nexp].Task = task; + Thread[thread_id].PartList[nexp].Index = i; + Thread[thread_id].ExportSpace -= Thread[thread_id].ItemSize; + } + + int nexp = Thread[thread_id].NexportNodes++; + nexp = -1 - nexp; + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[thread_id].PartList) + Thread[thread_id].InitialSpace); + nodelist[nexp].Task = task; + nodelist[nexp].Index = i; + nodelist[nexp].Node = SubDomainNodeIndex[no - (SubTree_MaxPart + SubTree_MaxNodes)]; + Thread[thread_id].ExportSpace -= sizeof(struct datanodelist) + sizeof(int); + return 0; +} + +/*! \brief Comparison function for r2type objects. + * + * Compares element r2. + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1) -1 if a < b. + */ +static int subfind_ngb_compare_dist(const void *a, const void *b) +{ + if(((r2type *)a)->r2 < (((r2type *)b)->r2)) + return -1; + + if(((r2type *)a)->r2 > (((r2type *)b)->r2)) + return +1; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_io.c b/src/amuse/community/arepo/src/subfind/subfind_io.c new file mode 100644 index 0000000000..be760976b1 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_io.c @@ -0,0 +1,156 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_io.c + * \date 05/2018 + * \brief Main output routine for subfind. + * \details contains functions: + * void subfind_save_final(int num) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../fof/fof.h" + +#ifdef SUBFIND +#include "subfind.h" + +/*! \brief Saves subfind group catalogue to disk. + * + * Note that this routine calls the FoF I/O routines. + * + * \param[in] num Index of this snapshot output. + * + * \return void + */ +void subfind_save_final(int num) +{ + int i, filenr, gr, ngrps, masterTask, lastTask, totsubs; + char buf[1000]; + double t0, t1; + + /* prepare list of ids with assigned group numbers */ +#ifdef FOF_STOREIDS + fof_subfind_prepare_ID_list(); +#endif /* #ifdef FOF_STOREIDS */ + + t0 = second(); + + /* fill in the FirstSub-values */ + for(i = 0, totsubs = 0; i < Ngroups; i++) + { + if(i > 0) + Group[i].FirstSub = Group[i - 1].FirstSub + Group[i - 1].Nsubs; + else + Group[i].FirstSub = 0; + totsubs += Group[i].Nsubs; + } + + MPI_Allgather(&totsubs, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD); + for(i = 1, Send_offset[0] = 0; i < NTask; i++) + Send_offset[i] = Send_offset[i - 1] + Send_count[i - 1]; + + for(i = 0; i < Ngroups; i++) + { + if(Group[i].Nsubs > 0) + Group[i].FirstSub += Send_offset[ThisTask]; + else + Group[i].FirstSub = -1; + } + + CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE); + + if(NTask < All.NumFilesPerSnapshot) + { + warn( + "Number of processors must be larger or equal than All.NumFilesPerSnapshot! Reducing All.NumFilesPerSnapshot " + "accordingly.\n"); + All.NumFilesPerSnapshot = NTask; + } + + if(All.SnapFormat < 1 || All.SnapFormat > 3) + mpi_printf("Unsupported File-Format All.SnapFormat=%d \n", All.SnapFormat); + +#ifndef HAVE_HDF5 + if(All.SnapFormat == 3) + { + mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n"); + } +#endif /* #ifndef HAVE_HDF5 */ + + /* assign processors to output files */ + distribute_file(All.NumFilesPerSnapshot, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask); + + if(All.NumFilesPerSnapshot > 1) + { + if(ThisTask == 0) + { + sprintf(buf, "%s/groups_%03d", All.OutputDir, num); + mkdir(buf, 02755); + } + MPI_Barrier(MPI_COMM_WORLD); + } + + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/groups_%03d/%s_%03d.%d", All.OutputDir, num, "fof_subhalo_tab", num, filenr); + else + sprintf(buf, "%s%s_%03d", All.OutputDir, "fof_subhalo_tab", num); + + ngrps = All.NumFilesPerSnapshot / All.NumFilesWrittenInParallel; + if((All.NumFilesPerSnapshot % All.NumFilesWrittenInParallel)) + ngrps++; + + for(gr = 0; gr < ngrps; gr++) + { + if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */ + fof_subfind_write_file(buf, masterTask, lastTask); + + MPI_Barrier(MPI_COMM_WORLD); + } + + myfree(CommBuffer); + +#ifdef FOF_STOREIDS + myfree(ID_list); +#endif /* #ifdef FOF_STOREIDS */ + + t1 = second(); + + mpi_printf("SUBFIND: Subgroup catalogues saved. took = %g sec\n", timediff(t0, t1)); +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_loctree.c b/src/amuse/community/arepo/src/subfind/subfind_loctree.c new file mode 100644 index 0000000000..9b3f26255c --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_loctree.c @@ -0,0 +1,930 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_loctree.c + * \date 05/2018 + * \brief Algorithms for local tree in subfind. + * \details contains functions: + * void subfind_loctree_findExtent(int npart, struct unbind_data *mp) + * void subfind_loctree_copyExtent(void) + * int subfind_loctree_treebuild(int npart, struct unbind_data **udp) + * void subfind_loctree_update_node_recursive(int no, int sib, int father) + * double subfind_loctree_treeevaluate_potential(int target) + * int subfind_locngb_compare_key(const void *a, const void *b) + * double subfind_locngb_treefind(MyDouble xyz[3], int desngb, double hguess) + * int subfind_locngb_treefind_variable(MyDouble searchcenter[3], double hguess) + * size_t subfind_loctree_treeallocate(int maxnodes, int maxpart) + * void subfind_loctree_treefree(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../gravity/forcetree.h" +#include "subfind.h" + +#ifdef SUBFIND +static double RootLen, RootFac, RootBigFac, RootInverseLen, RootCenter[3], RootCorner[3]; +static int LocMaxPart; +static int MaxNodes, last; +static int *LocNextNode; + +static unsigned long long *LocTree_IntPos_list; + +/*! \brief Node structure for subfind tree. + */ +static struct LocNODE +{ + union + { + int suns[8]; /*!< temporary pointers to daughter nodes */ + struct + { + MyDouble s[3]; /*!< center of mass of node */ + MyDouble mass; /*!< mass of node */ + unsigned char maxsofttype; +#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */ + int sibling; /*!< this gives the next node in the walk in case the current node can be used */ + int nextnode; /*!< this gives the next node in case the current node needs to be opened */ + } d; + } u; + + MyDouble center[3]; /*!< geometrical center of node */ + MyFloat len; /*!< sidelength of treenode */ + +#ifdef MULTIPLE_NODE_SOFTENING + MyDouble mass_per_type[NSOFTTYPES]; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ +} * LocNodes_base, /*!< points to the actual memory allocted for the nodes */ + *LocNodes; /*!< this is a pointer used to access the nodes which is shifted such that Nodes[LocMaxPart] + gives the first allocated node */ + +/*! \brief Calculates min/max coordinate of particles in unbind data. + * + * \param[in] npart Number of local particles (in unbind_data). + * \param[in] mp Pointer to unbind data. + * + * \return void + */ +void subfind_loctree_findExtent(int npart, struct unbind_data *mp) +{ + int i, j, k; + double len, xmin[3], xmax[3]; + + /* determine extension */ + for(i = 0; i < 3; i++) + { + xmin[i] = MAX_REAL_NUMBER; + xmax[i] = -MAX_REAL_NUMBER; + } + + for(k = 0; k < npart; k++) + { + if(mp) + i = mp[k].index; + else + terminate("what?"); + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(j = 0; j < 3; j++) + { + if(xmin[j] > PS[i].Center[j]) + xmin[j] = PS[i].Center[j]; + + if(xmax[j] < PS[i].Center[j]) + xmax[j] = PS[i].Center[j]; + } + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(j = 0; j < 3; j++) + { + if(xmin[j] > P[i].Pos[j]) + xmin[j] = P[i].Pos[j]; + + if(xmax[j] < P[i].Pos[j]) + xmax[j] = P[i].Pos[j]; + } + } + } + + len = 0; + for(j = 0; j < 3; j++) + if(xmax[j] - xmin[j] > len) + len = xmax[j] - xmin[j]; + + len *= 1.001; + + RootLen = len; + RootInverseLen = 1.0 / RootLen; + RootFac = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION)); + RootBigFac = (RootLen / (((long long)1) << 52)); + + for(j = 0; j < 3; j++) + { + RootCenter[j] = 0.5 * (xmin[j] + xmax[j]); + RootCorner[j] = 0.5 * (xmin[j] + xmax[j]) - 0.5 * len; + } +} + +/*! \brief Copy extent information from SubDomain to Root. + * + * This is called from the collective subfind code. + * + * \return void + */ +void subfind_loctree_copyExtent(void) +{ + int j; + for(j = 0; j < 3; j++) + { + RootCenter[j] = SubDomainCenter[j]; + RootCorner[j] = SubDomainCorner[j]; + } + RootLen = SubDomainLen; + RootInverseLen = SubDomainInverseLen; + RootFac = SubDomainFac; + RootBigFac = SubDomainBigFac; +} + +/*! \brief Construct the subfind tree. + * + * \param[in] npart Number of particles involved. + * \param[in] udp Unbind data. + * + * \return Number of nodes. + */ +int subfind_loctree_treebuild(int npart, struct unbind_data **udp) +{ + int i, j, k, subnode = 0, parent = -1, numnodes; + int nfree, th, nn; + struct LocNODE *nfreep; + struct unbind_data *mp; + + /* select first node */ + nfree = LocMaxPart; + nfreep = &LocNodes[nfree]; + + mp = *udp; + + /* create an empty root node */ + nfreep->len = (MyFloat)RootLen; + for(i = 0; i < 3; i++) + nfreep->center[i] = (MyFloat)RootCenter[i]; + + for(i = 0; i < 8; i++) + nfreep->u.suns[i] = -1; + + numnodes = 1; + nfreep++; + nfree++; + + /* insert all particles */ + + LocTree_IntPos_list = + (unsigned long long *)mymalloc_movable(&LocTree_IntPos_list, "LocTree_IntPos_list", 3 * NumPart * sizeof(unsigned long long)); + + for(k = 0; k < npart; k++) + { + if(mp) + i = mp[k].index; + else + terminate("what?"); + + MyDouble *posp; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + posp = &PS[i].Center[0]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + posp = &P[i].Pos[0]; + + unsigned long long xxb = force_double_to_int(((posp[0] - RootCorner[0]) * RootInverseLen) + 1.0); + unsigned long long yyb = force_double_to_int(((posp[1] - RootCorner[1]) * RootInverseLen) + 1.0); + unsigned long long zzb = force_double_to_int(((posp[2] - RootCorner[2]) * RootInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + signed long long centermask = (0xFFF0000000000000llu); + unsigned char levels = 0; + + unsigned long long *intposp = &LocTree_IntPos_list[3 * i]; + + *intposp++ = xxb; + *intposp++ = yyb; + *intposp++ = zzb; + + th = LocMaxPart; + + while(1) + { + if(th >= LocMaxPart) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEEL < gravitational softening length + */ + for(j = 0; j < 8; j++) + { + if(LocNodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + nn = LocNodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; /* note: subnode can still be used in the next step of the walk */ + th = nn; + } + else + { + /* here we have found an empty slot where we can + * attach the new particle as a leaf + */ + LocNodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* we try to insert into a leaf with a single particle + * need to generate a new internal node at this point + */ + LocNodes[parent].u.suns[subnode] = nfree; + + /* the other is: */ + double len = ((double)(mask << 1)) * RootBigFac; + double cx = ((double)((xxb & centermask) | mask)) * RootBigFac + RootCorner[0]; + double cy = ((double)((yyb & centermask) | mask)) * RootBigFac + RootCorner[1]; + double cz = ((double)((zzb & centermask) | mask)) * RootBigFac + RootCorner[2]; + + nfreep->len = len; + nfreep->center[0] = cx; + nfreep->center[1] = cy; + nfreep->center[2] = cz; + + nfreep->u.suns[0] = -1; + nfreep->u.suns[1] = -1; + nfreep->u.suns[2] = -1; + nfreep->u.suns[3] = -1; + nfreep->u.suns[4] = -1; + nfreep->u.suns[5] = -1; + nfreep->u.suns[6] = -1; + nfreep->u.suns[7] = -1; + + unsigned long long *intppos = &LocTree_IntPos_list[3 * th]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = th; + + th = nfree; /* resume trying to insert the new particle at + the newly created internal node */ + + numnodes++; + nfree++; + nfreep++; + + if(numnodes >= MaxNodes) + { + MaxNodes *= 1.2; + + LocNodes_base = (struct LocNODE *)myrealloc_movable(LocNodes_base, (MaxNodes + 1) * sizeof(struct LocNODE)); + LocNodes = LocNodes_base - LocMaxPart; + nfreep = &LocNodes[nfree]; + mp = *udp; + + if(numnodes > MaxNodes) + { + char buf[1000]; + + sprintf(buf, "maximum number %d of tree-nodes reached., for particle %d %g %g %g", MaxNodes, i, P[i].Pos[0], + P[i].Pos[1], P[i].Pos[2]); + terminate(buf); + } + } + } + } + } + + myfree(LocTree_IntPos_list); + + /* now compute the multipole moments recursively */ + last = -1; + subfind_loctree_update_node_recursive(LocMaxPart, -1, -1); + + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = -1; + else + LocNextNode[last] = -1; + + return numnodes; +} + +/*! \brief Compute multipole moments. + * + * This routine computes the multipole moments for a given internal node and + * all its subnodes using a recursive computation. + * + * \param[in] no Node that we are in. + * \param[in] sib Sibling of the node. + * \param[in] father Parent node. + * + * \return void + */ +void subfind_loctree_update_node_recursive(int no, int sib, int father) +{ + int j, jj, p, pp = 0, nextsib, suns[8]; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + double mass; + double s[3]; + + if(no >= LocMaxPart) + { + for(j = 0; j < 8; j++) + suns[j] = LocNodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + if(last >= 0) + { + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = no; + else + LocNextNode[last] = no; + } + + last = no; + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; + +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + subfind_loctree_update_node_recursive(p, nextsib, no); + + if(p >= LocMaxPart) /* an internal node */ + { + mass += LocNodes[p].u.d.mass; /* we assume a fixed particle mass */ + s[0] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[0]; + s[1] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[1]; + s[2] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[LocNodes[p].u.d.maxsofttype]) + maxsofttype = LocNodes[p].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += LocNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < LocNodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = LocNodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > LocNodes[p].u.d.minhydrosofttype) + minhydrosofttype = LocNodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else /* a particle */ + { + mass += P[p].Mass; +#ifdef CELL_CENTER_GRAVITY + if(P[p].Type == 0) + { + s[0] += P[p].Mass * PS[p].Center[0]; + s[1] += P[p].Mass * PS[p].Center[1]; + s[2] += P[p].Mass * PS[p].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + s[0] += P[p].Mass * P[p].Pos[0]; + s[1] += P[p].Mass * P[p].Pos[1]; + s[2] += P[p].Mass * P[p].Pos[2]; + } + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[P[p].SofteningType]) + maxsofttype = P[p].SofteningType; +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[P[p].Type == 0 ? 0 : P[p].SofteningType] += P[p].Mass; + + if(P[p].Type == 0) + { + if(maxhydrosofttype < P[p].SofteningType) + maxhydrosofttype = P[p].SofteningType; + if(minhydrosofttype > P[p].SofteningType) + minhydrosofttype = P[p].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[P[p].SofteningType] += P[p].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + } + + if(mass > 0) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = LocNodes[no].center[0]; + s[1] = LocNodes[no].center[1]; + s[2] = LocNodes[no].center[2]; + } + + LocNodes[no].u.d.s[0] = (MyFloat)s[0]; + LocNodes[no].u.d.s[1] = (MyFloat)s[1]; + LocNodes[no].u.d.s[2] = (MyFloat)s[2]; + LocNodes[no].u.d.mass = (MyFloat)mass; + LocNodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + LocNodes[no].mass_per_type[k] = mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + LocNodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + LocNodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + LocNodes[no].u.d.sibling = sib; + } + else /* single particle or pseudo particle */ + { + if(last >= 0) + { + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = no; + else + LocNextNode[last] = no; + } + + last = no; + } +} + +/*! \brief Evaluates the potential by walking the subfind local tree. + * + * \param[in] target Index of the particle. + * + * \return Gravitational potiential. + */ +double subfind_loctree_treeevaluate_potential(int target) +{ + struct LocNODE *nop = 0; + int no; + double r2, dx, dy, dz, mass, r, u, h_i, h_j, hmax, h_inv, wp; + double pot, pos_x, pos_y, pos_z, xtmp, ytmp, ztmp; + +#ifdef CELL_CENTER_GRAVITY + if(P[target].Type == 0) + { + pos_x = PS[target].Center[0]; + pos_y = PS[target].Center[1]; + pos_z = PS[target].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + pos_x = P[target].Pos[0]; + pos_y = P[target].Pos[1]; + pos_z = P[target].Pos[2]; + } + + h_i = All.ForceSoftening[P[target].SofteningType]; + + pot = 0; + + no = LocMaxPart; + + while(no >= 0) + { +#ifdef MULTIPLE_NODE_SOFTENING + int indi_flag1 = -1, indi_flag2 = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(no < LocMaxPart) /* single particle */ + { +#ifdef CELL_CENTER_GRAVITY + if(P[no].Type == 0) + { + dx = GRAVITY_NEAREST_X(PS[no].Center[0] - pos_x); + dy = GRAVITY_NEAREST_Y(PS[no].Center[1] - pos_y); + dz = GRAVITY_NEAREST_Z(PS[no].Center[2] - pos_z); + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + dx = GRAVITY_NEAREST_X(P[no].Pos[0] - pos_x); + dy = GRAVITY_NEAREST_Y(P[no].Pos[1] - pos_y); + dz = GRAVITY_NEAREST_Z(P[no].Pos[2] - pos_z); + } + + r2 = dx * dx + dy * dy + dz * dz; + + mass = P[no].Mass; + + h_j = All.ForceSoftening[P[no].SofteningType]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + + no = LocNextNode[no]; + } + else + { + nop = &LocNodes[no]; + mass = nop->u.d.mass; + + dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x); + dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y); + dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + /* check Barnes-Hut opening criterion */ + if(nop->len * nop->len > r2 * All.ErrTolThetaSubfind * All.ErrTolThetaSubfind) + { + /* open cell */ + if(mass) + { + no = nop->u.d.nextnode; + continue; + } + } + + h_j = All.ForceSoftening[nop->u.d.maxsofttype]; + + if(h_j > h_i) + { +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype) + if(LocNodes[no].mass_per_type[0] > 0) + if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype]) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + indi_flag1 = 0; + indi_flag2 = NSOFTTYPES; +#else /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(r2 < h_j * h_j) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */ + hmax = h_j; + } + else + hmax = h_i; + + no = nop->u.d.sibling; /* node can be used */ + } + + r = sqrt(r2); +#ifdef MULTIPLE_NODE_SOFTENING + int type; + for(type = indi_flag1; type < indi_flag2; type++) + { + if(type >= 0) + { + mass = nop->mass_per_type[type]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(type == 0) + h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype]; + else +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + h_j = All.ForceSoftening[type]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + } + + if(mass) + { +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(r >= hmax) + pot -= mass / r; + else + { + h_inv = 1.0 / hmax; + + u = r * h_inv; + + if(u < 0.5) + wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6)); + else + wp = -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u))); + + pot += mass * h_inv * wp; +#ifdef MULTIPLE_NODE_SOFTENING + } + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + return pot; +} + +/*! \brief Comparison function for r2type objects. + * + * Compares element r2. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a->r2 < b->r2. + */ +int subfind_locngb_compare_key(const void *a, const void *b) +{ + if(((r2type *)a)->r2 < (((r2type *)b)->r2)) + return -1; + + if(((r2type *)a)->r2 > (((r2type *)b)->r2)) + return +1; + + return 0; +} + +/*! \brief Iterates on smoothing length of neighbor search to get a desired + * number of neighbors. + * + * \param[in] xyz Search center of neighbor search. + * \param[in] desngb Desired number of neighbors. + * \param[in] hguess Initial guess of smoothing length. + * + * \return Distance of the outermost particle to seearch center. + */ +double subfind_locngb_treefind(MyDouble xyz[3], int desngb, double hguess) +{ + int numngb; + double h2max; + + if(hguess == 0) + terminate("hguess needed"); + + while(1) + { + numngb = subfind_locngb_treefind_variable(xyz, hguess); + + if(numngb < desngb) + { + hguess *= 1.26; + continue; + } + + if(numngb >= desngb) + { + qsort(R2list, numngb, sizeof(r2type), subfind_locngb_compare_key); + h2max = R2list[desngb - 1].r2; + break; + } + + hguess *= 1.26; + } + + return sqrt(h2max); +} + +/*! \brief (Local) tree-search in subfind tree. + * + * Adds these cells to R2list. + * + * \param[in] searchcenter Center around which particles are searched. + * \param[in] hguess Distance up to which particles are searched. + * + * \return Number of neighbors found. + */ +int subfind_locngb_treefind_variable(MyDouble searchcenter[3], double hguess) +{ + int numngb, no, p; + double dx, dy, dz, r2, h2; + struct LocNODE *thisnode; + double xtmp, ytmp, ztmp; + + h2 = hguess * hguess; + + numngb = 0; + no = LocMaxPart; + + while(no >= 0) + { + if(no < LocMaxPart) /* single particle */ + { + p = no; + no = LocNextNode[no]; +#ifdef CELL_CENTER_GRAVITY + if(P[p].Type == 0) + { + dx = GRAVITY_NEAREST_X(PS[p].Center[0] - searchcenter[0]); + dy = GRAVITY_NEAREST_Y(PS[p].Center[1] - searchcenter[1]); + dz = GRAVITY_NEAREST_Z(PS[p].Center[2] - searchcenter[2]); + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + dx = GRAVITY_NEAREST_X(P[p].Pos[0] - searchcenter[0]); + dy = GRAVITY_NEAREST_Y(P[p].Pos[1] - searchcenter[1]); + dz = GRAVITY_NEAREST_Z(P[p].Pos[2] - searchcenter[2]); + } + + if(dx < -hguess) + continue; + if(dx > hguess) + continue; + + if(dy < -hguess) + continue; + if(dy > hguess) + continue; + + if(dz < -hguess) + continue; + if(dz > hguess) + continue; + + r2 = dx * dx + dy * dy + dz * dz; + + if(r2 <= h2) + { + R2list[numngb].r2 = r2; + R2list[numngb].index = p; + numngb++; + } + } + else + { + thisnode = &LocNodes[no]; + + no = LocNodes[no].u.d.sibling; /* in case the node can be discarded */ + + if((GRAVITY_NEAREST_X(thisnode->center[0] - searchcenter[0]) + 0.5 * thisnode->len) < -hguess) + continue; + if((GRAVITY_NEAREST_X(thisnode->center[0] - searchcenter[0]) - 0.5 * thisnode->len) > hguess) + continue; + if((GRAVITY_NEAREST_Y(thisnode->center[1] - searchcenter[1]) + 0.5 * thisnode->len) < -hguess) + continue; + if((GRAVITY_NEAREST_Y(thisnode->center[1] - searchcenter[1]) - 0.5 * thisnode->len) > hguess) + continue; + if((GRAVITY_NEAREST_Z(thisnode->center[2] - searchcenter[2]) + 0.5 * thisnode->len) < -hguess) + continue; + if((GRAVITY_NEAREST_Z(thisnode->center[2] - searchcenter[2]) - 0.5 * thisnode->len) > hguess) + continue; + + no = thisnode->u.d.nextnode; /* ok, we need to open the node */ + } + } + + return numngb; +} + +/*! \brief Allocates memory used for storage of the tree + * and auxiliary arrays for tree-walk and link-lists. + * + * \param[in] maxnodes Maximum number of nodes. + * \param[in] maxpart Maximum number of particles. + * + * \return Size of allocated memory in bytes. + */ +size_t subfind_loctree_treeallocate(int maxnodes, int maxpart) +{ + size_t bytes, allbytes = 0; + + if(LocNextNode) + terminate("loctree already allocated"); + + MaxNodes = maxnodes; + LocMaxPart = maxpart; + + LocNextNode = (int *)mymalloc("LocNextNode", bytes = maxpart * sizeof(int)); + allbytes += bytes; + + R2list = (r2type *)mymalloc("R2list", bytes = maxpart * sizeof(r2type)); + allbytes += bytes; + + LocNodes_base = (struct LocNODE *)mymalloc_movable(&LocNodes_base, "LocNodes_base", bytes = (MaxNodes + 1) * sizeof(struct LocNODE)); + LocNodes = LocNodes_base - LocMaxPart; + allbytes += bytes; + + return allbytes; +} + +/*! \brief Frees the memory allocated for subfind_loctree. + * + * \return void + */ +void subfind_loctree_treefree(void) +{ + myfree(LocNodes_base); + myfree(R2list); + myfree(LocNextNode); + + LocNextNode = NULL; + R2list = NULL; + LocNodes_base = NULL; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c b/src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c new file mode 100644 index 0000000000..23e8bf95f3 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c @@ -0,0 +1,475 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_nearesttwo.c + * \date 05/2018 + * \brief Neighbor finding of particles in group. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void subfind_find_nearesttwo(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "subfind.h" + +static int subfind_nearesttwo_evaluate(int target, int mode, int threadid); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyIDType ID; + MyFloat Hsml; + MyFloat Density; + MyFloat Dist[2]; + int Count; + long long Index[2]; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + int k; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + in->Pos[0] = PS[i].Center[0]; + in->Pos[1] = PS[i].Center[1]; + in->Pos[2] = PS[i].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + } + + in->Hsml = PS[i].Hsml; + in->ID = P[i].ID; + in->Density = PS[i].Density; + in->Count = NgbLoc[i].count; + for(k = 0; k < NgbLoc[i].count; k++) + { + in->Dist[k] = R2Loc[i].dist[k]; + in->Index[k] = NgbLoc[i].index[k]; + } + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Dist[2]; + long long Index[2]; + int Count; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + int k; + + NgbLoc[i].count = out->Count; + + for(k = 0; k < out->Count; k++) + { + R2Loc[i].dist[k] = out->Dist[k]; + NgbLoc[i].index[k] = out->Index[k]; + } + } + else /* combine */ + { + int k, l; + + for(k = 0; k < out->Count; k++) + { + if(NgbLoc[i].count >= 1) + if(NgbLoc[i].index[0] == out->Index[k]) + continue; + + if(NgbLoc[i].count == 2) + if(NgbLoc[i].index[1] == out->Index[k]) + continue; + + if(NgbLoc[i].count < 2) + { + l = NgbLoc[i].count; + NgbLoc[i].count++; + } + else + { + if(R2Loc[i].dist[0] > R2Loc[i].dist[1]) + l = 0; + else + l = 1; + + if(out->Dist[k] >= R2Loc[i].dist[l]) + continue; + } + + R2Loc[i].dist[l] = out->Dist[k]; + NgbLoc[i].index[l] = out->Index[k]; + + if(NgbLoc[i].count == 2) + if(NgbLoc[i].index[0] == NgbLoc[i].index[1]) + terminate("this is not supposed to happen"); + } + } +} + +#define USE_SUBCOMM_COMMUNICATOR +#include "../utils/generic_comm_helpers2.h" + +static double *Dist2list; +static int *Ngblist; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + { + int j, threadid = get_thread_num(); + + for(j = 0; j < SubNTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPartGroup) + break; + + subfind_nearesttwo_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_nearesttwo_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Neighbour finding for each particle in group. + * + * \return void + */ +void subfind_find_nearesttwo(void) +{ + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: Start finding nearest two.\n", ThisTask); + + /* allocate buffers to arrange communication */ + + Ngblist = (int *)mymalloc("Ngblist", NumPartGroup * sizeof(int)); + Dist2list = (double *)mymalloc("Dist2list", NumPartGroup * sizeof(double)); + + generic_set_MaxNexport(); + + for(int i = 0; i < NumPartGroup; i++) + NgbLoc[i].count = 0; + + generic_comm_pattern(NumPartGroup, kernel_local, kernel_imported); + + myfree(Dist2list); + myfree(Ngblist); + + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: Done with nearest two.\n", ThisTask); +} + +/*! \brief Neighbor finding routine on local particles. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +static int subfind_nearesttwo_evaluate(int target, int mode, int threadid) +{ + int j, k, n, no, count; + MyIDType ID; + long long index[2]; + double dist[2]; + int numngb, numnodes, *firstnode; + double hsml; + double density; + MyDouble *pos; + struct NODE *current; + double dx, dy, dz, disthsml, r2; + MyDouble xtmp, ytmp, ztmp; + + data_in local, *in; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + in = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + in = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + ID = in->ID; + density = in->Density; + pos = in->Pos; + hsml = in->Hsml; + count = in->Count; + for(k = 0; k < count; k++) + { + dist[k] = in->Dist[k]; + index[k] = in->Index[k]; + } + + if(count == 2) + if(index[0] == index[1]) + { + terminate("task=%d/%d target=%d mode=%d index_0=%lld index_1=%lld\n", SubThisTask, ThisTask, target, mode, index[0], + index[1]); + } + + numngb = 0; + count = 0; + + hsml *= 1.00001; /* prevents that the most distant neighbour on the edge of the search region may not be found. + * (needed for consistency with serial algorithm) + */ + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = SubTree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = SubNodes[no].u.d.nextnode; /* open it */ + } + while(no >= 0) + { + if(no < SubTree_MaxPart) /* single particle */ + { + int p = no; + no = SubNextnode[no]; + + disthsml = hsml; + dx = FOF_NEAREST_LONG_X(SubTree_Pos_list[3 * p + 0] - pos[0]); + if(dx > disthsml) + continue; + dy = FOF_NEAREST_LONG_Y(SubTree_Pos_list[3 * p + 1] - pos[1]); + if(dy > disthsml) + continue; + dz = FOF_NEAREST_LONG_Z(SubTree_Pos_list[3 * p + 2] - pos[2]); + if(dz > disthsml) + continue; + if((r2 = (dx * dx + dy * dy + dz * dz)) > disthsml * disthsml) + continue; + + Dist2list[numngb] = r2; + Ngblist[numngb++] = p; + } + else if(no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + if(mode == 1) + { + if(no < SubTree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the + branch */ + { + break; + } + } + + current = &SubNodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + disthsml = hsml + 0.5 * current->len; + + dx = FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > disthsml) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > disthsml) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > disthsml) + continue; + /* now test against the minimal sphere enclosing everything */ + disthsml += FACT1 * current->len; + if(dx * dx + dy * dy + dz * dz > disthsml * disthsml) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else if(no >= SubTree_ImportedNodeOffset) /* point from imported nodelist */ + { + terminate("do not expect imported points here"); + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + if(target >= 0) /* note: if no target is given, export will not occur */ + subfind_treefind_collective_export_node_threads(no, target, threadid); + + no = SubNextnode[no - SubTree_MaxNodes]; + } + } + } + + for(n = 0; n < numngb; n++) + { + j = Ngblist[n]; + r2 = Dist2list[n]; + + if(P[j].ID != ID) /* exclude the self-particle */ + { + if(PS[j].Density > density) /* we only look at neighbours that are denser */ + { + if(count < 2) + { + dist[count] = r2; + index[count] = (((long long)SubThisTask) << 32) + j; + count++; + } + else + { + if(dist[0] > dist[1]) + k = 0; + else + k = 1; + + if(r2 < dist[k]) + { + dist[k] = r2; + index[k] = (((long long)SubThisTask) << 32) + j; + } + } + } + } + } + + out.Count = count; + for(k = 0; k < count; k++) + { + out.Dist[k] = dist[k]; + out.Index[k] = index[k]; + } + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_properties.c b/src/amuse/community/arepo/src/subfind/subfind_properties.c new file mode 100644 index 0000000000..5d2756cbdf --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_properties.c @@ -0,0 +1,1195 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_properties.c + * \date 05/2018 + * \brief Calculation of the subgroup properties. + * \details contains functions: + * void subfind_determine_sub_halo_properties(struct + * unbind_data *d, int num, struct subgroup_properties + * *subgroup, int grnr, int subnr, int parallel_flag, int + * nsubgroups_cat) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../fof/fof.h" +#include "subfind.h" + +/*! \brief Calculates subhalo properties. + * + * + * \param[in] d Unbind data. + * \param[in] num Length of d. + * \param[out] subgroup Data for subgroup properties. + * \param[in] grnr Index in GroupCat. + * \param[in] subnr Index of Subhalo in this group. + * \param[in] parallel_flag If set, the code calculates the properties for a + * subhalo distributed onto several processors. + * \param[in] nsubgroups_cat (unused) + * + * \return void + */ +void subfind_determine_sub_halo_properties(struct unbind_data *d, int num, struct subgroup_properties *subgroup, int grnr, int subnr, + int parallel_flag, int nsubgroups_cat) +{ + int i, j, p, len_type[NTYPES], len_type_loc[NTYPES], totlen; + double s[3], v[3], pos[3], vel[3], spin[3], cm[3], veldisp, max, vel_to_phys, H_of_a, minpot; +#ifdef MHD + double bfld_halo, bfld_disk, bfld_vol_halo, bfld_vol_disk; +#endif /* #ifdef MHD */ +#ifdef SUBFIND_EXTENDED_PROPERTIES + double Ekin = 0, Epot = 0, Ethr = 0, Jdm[3], Jgas[3], Jstars[3], CMFrac, CMFracType[NTYPES]; + double Jdm_inHalfRad[3], Jgas_inHalfRad[3], Jstars_inHalfRad[3], CMFrac_inHalfRad, CMFracType_inHalfRad[NTYPES]; + double Jdm_inRad[3], Jgas_inRad[3], Jstars_inRad[3], CMFrac_inRad, CMFracType_inRad[NTYPES]; + double jpart[3], Jtot[3], Jtot_inRad[3], Jtot_inHalfRad[3]; + double sinrad[3], sinhalfrad[3], vinrad[3], vinhalfrad[3]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + double lx, ly, lz, dv[3], dx[3], disp, rr_tmp, disp_tmp, halfmassrad = 0, halfmassradtype[NTYPES]; + double boxsize, ddxx, vmax, vmaxrad, maxrad; + double mass, massinrad, massinhalfrad, massinmaxrad; + double mass_tab[NTYPES], massinrad_tab[NTYPES], massinhalfrad_tab[NTYPES], massinmaxrad_tab[NTYPES]; + double xtmp; + + sort_r2list *rr_list = 0; + int minindex; + MyIDType mostboundid; + +#ifdef USE_SFR + double sfr = 0, sfrinrad = 0, sfrinhalfrad = 0, sfrinmaxrad = 0, gasMassSfr = 0; +#endif /* #ifdef USE_SFR */ + + boxsize = All.BoxSize; + + vel_to_phys = 1.0 / All.cf_atime; + + if(All.ComovingIntegrationOn) + H_of_a = hubble_function(All.Time); + else + H_of_a = 0; + + mass = massinrad = massinhalfrad = massinmaxrad = 0; + for(j = 0; j < NTYPES; j++) + { + len_type[j] = 0; + mass_tab[j] = halfmassradtype[j] = massinrad_tab[j] = massinhalfrad_tab[j] = massinmaxrad_tab[j] = 0; + } + + for(i = 0, minindex = -1, minpot = 1.0e30; i < num; i++) + { + p = d[i].index; + if(PS[p].Potential < minpot || minindex == -1) + { + minpot = PS[p].Potential; + minindex = p; + } + + len_type[P[p].Type]++; + +#ifdef USE_SFR + if(P[p].Type == 0) + sfr += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */ +#endif /* #ifdef USE_SFR */ + } + + for(j = 0; j < NTYPES; j++) + len_type_loc[j] = len_type[j]; + + if(parallel_flag) + { + int len_typetot[NTYPES]; + MPI_Allreduce(len_type, len_typetot, NTYPES, MPI_INT, MPI_SUM, SubComm); + for(j = 0; j < NTYPES; j++) + len_type[j] = len_typetot[j]; + + double *minpotlist = mymalloc("minpotlist", SubNTask * sizeof(double)); + MPI_Allgather(&minpot, 1, MPI_DOUBLE, minpotlist, 1, MPI_DOUBLE, SubComm); + int mincpu; + + for(i = 0, mincpu = -1, minpot = 1.0e30; i < SubNTask; i++) + if(minpotlist[i] < minpot) + { + mincpu = i; + minpot = minpotlist[mincpu]; + } + + myfree(minpotlist); + + if(mincpu < 0) + terminate("mincpu < 0"); + + if(SubThisTask == mincpu) + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[minindex].Type == 0) + pos[j] = SphP[PS[minindex].OldIndex].Center[j]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos[j] = P[minindex].Pos[j]; + } + + MPI_Bcast(pos, 3, MPI_DOUBLE, mincpu, SubComm); + +#ifdef USE_SFR + double sfrtot; + MPI_Allreduce(&sfr, &sfrtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + sfr = sfrtot; +#endif /* #ifdef USE_SFR */ + } + else + { + if(minindex == -1) + terminate("minindex == -1"); + + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[minindex].Type == 0) + pos[j] = SphP[PS[minindex].OldIndex].Center[j]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos[j] = P[minindex].Pos[j]; + } + } + + /* pos[] now holds the position of minimum potential */ + /* we'll take it that as the center */ + + /* determine the particle ID with the smallest binding energy */ + for(i = 0, minindex = -1, minpot = 1.0e30; i < num; i++) + { + p = d[i].index; + if(PS[p].BindingEnergy < minpot || minindex == -1) + { + minpot = PS[p].BindingEnergy; + minindex = p; + } + } + + if(parallel_flag) + { + double *minpotlist = mymalloc("minpotlist", SubNTask * sizeof(double)); + MPI_Allgather(&minpot, 1, MPI_DOUBLE, minpotlist, 1, MPI_DOUBLE, SubComm); + int mincpu; + + for(i = 0, mincpu = -1, minpot = 1.0e30; i < SubNTask; i++) + if(minpotlist[i] < minpot) + { + mincpu = i; + minpot = minpotlist[mincpu]; + } + + myfree(minpotlist); + + if(mincpu < 0) + terminate("mincpu < 0"); + + if(SubThisTask == mincpu) + { + mostboundid = P[minindex].ID; + } + + MPI_Bcast(&mostboundid, sizeof(mostboundid), MPI_BYTE, mincpu, SubComm); + } + else + { + if(minindex == -1) + terminate("minindex == -1"); + + mostboundid = P[minindex].ID; + } + + /* let's get bulk velocity and the center-of-mass */ + /* here we still take all particles */ + + for(j = 0; j < 3; j++) + s[j] = v[j] = 0; + + for(i = 0; i < num; i++) + { + p = d[i].index; + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + s[j] += P[p].Mass * ddxx; + v[j] += P[p].Mass * P[p].Vel[j]; + } + mass += P[p].Mass; + + int ptype = P[p].Type; + mass_tab[ptype] += P[p].Mass; + } + + if(parallel_flag) + { + double stot[3], vtot[3], masstot, mass_tabtot[NTYPES]; + + MPI_Allreduce(s, stot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(v, vtot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(mass_tab, mass_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + + mass = masstot; + for(j = 0; j < 3; j++) + { + s[j] = stot[j]; + v[j] = vtot[j]; + } + + for(j = 0; j < NTYPES; j++) + mass_tab[j] = mass_tabtot[j]; + } + + for(j = 0; j < 3; j++) + { + s[j] /= mass; /* center of mass */ + v[j] /= mass; + vel[j] = vel_to_phys * v[j]; + } + + for(j = 0; j < 3; j++) + { + s[j] += pos[j]; + + while(s[j] < 0) + s[j] += boxsize; + while(s[j] >= boxsize) + s[j] -= boxsize; + cm[j] = s[j]; // this is in comoving coordinates + } + + disp = lx = ly = lz = 0; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Jtot[0] = Jtot[1] = Jtot[2] = 0; + Jdm[0] = Jdm[1] = Jdm[2] = 0; + Jgas[0] = Jgas[1] = Jgas[2] = 0; + Jstars[0] = Jstars[1] = Jstars[2] = 0; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + rr_list = mymalloc("rr_list", sizeof(sort_r2list) * (num + 1)); + + for(i = 0; i < num; i++) + { + p = d[i].index; + + for(j = 0, rr_tmp = 0, disp_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - s[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]); + dv[j] += H_of_a * dx[j]; + + disp_tmp += P[p].Mass * dv[j] * dv[j]; + /* for rotation curve computation, take minimum of potential as center */ + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + ddxx = All.cf_atime * ddxx; + rr_tmp += ddxx * ddxx; + } + + lx += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + ly += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + lz += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + +#ifdef SUBFIND_EXTENDED_PROPERTIES + for(j = 0; j < 3; j++) // hubble drifts in velocity now with respect to pot min which we consider as the centre of rotation + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]); + dv[j] += H_of_a * dx[j]; + } + + int ptype = P[p].Type; + + Ekin += (P[p].Mass / 2) * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]); + Epot += (P[p].Mass / 2) * PS[p].Potential; + if(P[p].Type == 0) + Ethr += P[p].Mass * SphP[PS[p].OldIndex].Utherm; + + Jtot[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jtot[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jtot[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if(ptype == 1) // dm illustris + { + Jdm[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jdm[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jdm[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 0) // gas (incl. winds!) + { + Jgas[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jgas[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jgas[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 4) // stars (previously: StarP[P[p].AuxDataID].BirthTime) + { + Jstars[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jstars[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jstars[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + rr_tmp = sqrt(rr_tmp); + + rr_list[i].mass = P[p].Mass; + rr_list[i].r = rr_tmp; + disp += disp_tmp; + } + + if(parallel_flag) + { + double spintot[3], disptot; + spin[0] = lx; + spin[1] = ly; + spin[2] = lz; + MPI_Allreduce(spin, spintot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&disp, &disptot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + disp = disptot; + lx = spintot[0]; + ly = spintot[1]; + lz = spintot[2]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + MPI_Allreduce(MPI_IN_PLACE, &Ekin, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &Epot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &Ethr, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jtot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jdm, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jgas, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jstars, 3, MPI_DOUBLE, MPI_SUM, SubComm); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + + spin[0] = lx / mass; + spin[1] = ly / mass; + spin[2] = lz / mass; + + veldisp = sqrt(disp / (3 * mass)); /* convert to 1d velocity dispersion */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + // counter rotating mass fractions + CMFrac = 0; + for(i = 0; i < NTYPES; i++) + CMFracType[i] = 0; + + for(i = 0; i < num; i++) + { + /* identify particle type */ + p = d[i].index; + + /* calculate particle radius */ + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); // counter-rotating mass calc with respect to pot min + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]); + dv[j] += H_of_a * dx[j]; + } + + int ptype = P[p].Type; + + jpart[0] = P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + jpart[1] = P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + jpart[2] = P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if((Jtot[0] * jpart[0] + Jtot[1] * jpart[1] + Jtot[2] * jpart[2]) < 0.) + CMFrac += P[p].Mass / mass; + + if(ptype == 1) // dm illustris + if((Jdm[0] * jpart[0] + Jdm[1] * jpart[1] + Jdm[2] * jpart[2]) < 0.) + CMFracType[1] += P[p].Mass / mass_tab[1]; + if(ptype == 0) // gas (incl. winds!) + if((Jgas[0] * jpart[0] + Jgas[1] * jpart[1] + Jgas[2] * jpart[2]) < 0.) + CMFracType[0] += P[p].Mass / mass_tab[0]; + if(ptype == 4) // stars + if((Jstars[0] * jpart[0] + Jstars[1] * jpart[1] + Jstars[2] * jpart[2]) < 0.) + CMFracType[4] += P[p].Mass / mass_tab[4]; + } + + if(parallel_flag) + { + MPI_Allreduce(MPI_IN_PLACE, &CMFrac, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, CMFracType, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + } + +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + if(parallel_flag) + parallel_sort_comm(rr_list, num, sizeof(sort_r2list), subfind_compare_dist_rotcurve, SubComm); + else + mysort(rr_list, num, sizeof(sort_r2list), subfind_compare_dist_rotcurve); + + /* calculate cumulative mass */ + for(i = 1; i < num; i++) + rr_list[i].mass += rr_list[i - 1].mass; + + if(parallel_flag) + { + double mass_part = 0; + if(num) + mass_part = rr_list[num - 1].mass; + double *masslist = mymalloc("masslist", SubNTask * sizeof(double)); + MPI_Allgather(&mass_part, 1, MPI_DOUBLE, masslist, 1, MPI_DOUBLE, SubComm); + + double massbefore = 0; + for(i = 0; i < SubThisTask; i++) + massbefore += masslist[i]; + + for(i = 0; i < num; i++) + rr_list[i].mass += massbefore; + + myfree(masslist); + + /* now calculate rotation curve maximum and half mass radius */ + + double halfmassrad_loc = 0; + sort_r2list *rr_lowlist = mymalloc("rr_lowlist", SubNTask * sizeof(sort_r2list)); + sort_r2list low_element; + if(num > 0) + low_element = rr_list[0]; + else + { + low_element.mass = 0; + low_element.r = 0; + } + MPI_Allgather(&low_element, sizeof(sort_r2list), MPI_BYTE, rr_lowlist, sizeof(sort_r2list), MPI_BYTE, SubComm); + + rr_list[num].mass = 0; + rr_list[num].r = 0; + + for(j = SubThisTask + 1; j < SubNTask; j++) + if(rr_lowlist[j].mass > 0) + { + rr_list[num] = rr_lowlist[j]; + break; + } + + myfree(rr_lowlist); + + int *numlist = mymalloc("numlist", SubNTask * sizeof(int)); + MPI_Allgather(&num, 1, MPI_INT, numlist, 1, MPI_INT, SubComm); + + int nbefore = 0; + for(i = 0; i < SubThisTask; i++) + nbefore += numlist[i]; + + for(i = num - 1, max = 0, maxrad = 0; i >= 0; i--) + { + if((i + nbefore) > 5 && rr_list[i].mass > max * rr_list[i].r) + { + max = rr_list[i].mass / rr_list[i].r; + maxrad = rr_list[i].r; + } + + if(rr_list[i].mass < 0.5 * mass && rr_list[i + 1].mass >= 0.5 * mass) + halfmassrad_loc = 0.5 * (rr_list[i].r + rr_list[i + 1].r); + } + + myfree(numlist); + + MPI_Allreduce(&halfmassrad_loc, &halfmassrad, 1, MPI_DOUBLE, MPI_MAX, SubComm); + double *maxlist = mymalloc("maxlist", SubNTask * sizeof(double)); + double *maxradlist = mymalloc("maxradlist", SubNTask * sizeof(double)); + MPI_Allgather(&max, 1, MPI_DOUBLE, maxlist, 1, MPI_DOUBLE, SubComm); + MPI_Allgather(&maxrad, 1, MPI_DOUBLE, maxradlist, 1, MPI_DOUBLE, SubComm); + for(i = 0, max = maxrad = 0; i < SubNTask; i++) + { + if(maxlist[i] > max) + { + max = maxlist[i]; + maxrad = maxradlist[i]; + } + } + myfree(maxradlist); + myfree(maxlist); + } + else + { + for(i = num - 1, max = 0, maxrad = 0; i >= 0; i--) + { + if(i > 5 && rr_list[i].mass > max * rr_list[i].r) + { + max = rr_list[i].mass / rr_list[i].r; + maxrad = rr_list[i].r; + } + + if(i < num - 1) + if(rr_list[i].mass < 0.5 * mass && rr_list[i + 1].mass >= 0.5 * mass) + halfmassrad = 0.5 * (rr_list[i].r + rr_list[i + 1].r); + } + } + + halfmassrad /= All.cf_atime; + vmax = sqrt(All.G * max); + vmaxrad = maxrad / All.cf_atime; + + myfree(rr_list); + + /* half mass radii for different types */ + /* need to recalculate len_type_loc first, because of special particle treatment in GFM */ + for(j = 0; j < NTYPES; j++) + len_type_loc[j] = 0; + + for(i = 0; i < num; i++) + { + p = d[i].index; + int ptype = P[p].Type; + + len_type_loc[ptype]++; + } + + int itmp, type; + for(type = 0; type < NTYPES; type++) + { + rr_list = mymalloc("rr_list", sizeof(sort_r2list) * (len_type_loc[type] + 1)); + itmp = 0; + for(i = 0; i < num; i++) + { + p = d[i].index; + + int ptype = P[p].Type; + + if(ptype == type) + { + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + rr_tmp += ddxx * ddxx; + } + + rr_tmp = sqrt(rr_tmp); + + rr_list[itmp].mass = P[p].Mass; + rr_list[itmp].r = rr_tmp; + itmp++; + } + } + + if(itmp != len_type_loc[type]) + terminate("should not occur: %d %d", itmp, len_type_loc[type]); + + if(parallel_flag) + parallel_sort_comm(rr_list, len_type_loc[type], sizeof(sort_r2list), subfind_compare_dist_rotcurve, SubComm); + else + mysort(rr_list, len_type_loc[type], sizeof(sort_r2list), subfind_compare_dist_rotcurve); + + /* calculate cumulative mass */ + for(i = 1; i < len_type_loc[type]; i++) + rr_list[i].mass = rr_list[i - 1].mass + rr_list[i].mass; + + if(parallel_flag) + { + double mass_part = 0; + if(len_type_loc[type]) + mass_part = rr_list[len_type_loc[type] - 1].mass; + double *masslist = mymalloc("masslist", SubNTask * sizeof(double)); + MPI_Allgather(&mass_part, 1, MPI_DOUBLE, masslist, 1, MPI_DOUBLE, SubComm); + + double massbefore = 0; + for(i = 0; i < SubThisTask; i++) + massbefore += masslist[i]; + + for(i = 0; i < len_type_loc[type]; i++) + rr_list[i].mass += massbefore; + + myfree(masslist); + } + + /* now calculate half mass radii */ + if(parallel_flag) + { + double halfmassrad_loc = 0; + sort_r2list *rr_lowlist = mymalloc("rr_lowlist", SubNTask * sizeof(sort_r2list)); + sort_r2list low_element; + if(len_type_loc[type] > 0) + low_element = rr_list[0]; + else + { + low_element.mass = 0; + low_element.r = 0; + } + + MPI_Allgather(&low_element, sizeof(sort_r2list), MPI_BYTE, rr_lowlist, sizeof(sort_r2list), MPI_BYTE, SubComm); + + rr_list[len_type_loc[type]].mass = 0; + rr_list[len_type_loc[type]].r = 0; + for(j = SubThisTask + 1; j < SubNTask; j++) + if(rr_lowlist[j].mass > 0) + { + rr_list[len_type_loc[type]] = rr_lowlist[j]; + break; + } + + myfree(rr_lowlist); + + for(i = len_type_loc[type] - 1; i >= 0; i--) + { + if(rr_list[i].mass < 0.5 * mass_tab[type] && rr_list[i + 1].mass >= 0.5 * mass_tab[type]) + halfmassrad_loc = 0.5 * (rr_list[i].r + rr_list[i + 1].r); + } + + MPI_Allreduce(&halfmassrad_loc, &halfmassradtype[type], 1, MPI_DOUBLE, MPI_MAX, SubComm); + } + else + { + for(i = len_type_loc[type] - 1; i >= 0; i--) + { + if(i < len_type_loc[type] - 1) + if(rr_list[i].mass < 0.5 * mass_tab[type] && rr_list[i + 1].mass >= 0.5 * mass_tab[type]) + halfmassradtype[type] = 0.5 * (rr_list[i].r + rr_list[i + 1].r); + } + } + + myfree(rr_list); + } + + /* properties of 'central galaxies', defined in several ways as particles within some radius: + either (stellar half mass radius) or SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) or (radius of Vmax) */ +#ifdef SUBFIND_EXTENDED_PROPERTIES + // centre of mass /velocity of particles in half/ stellar mass rad + sinrad[0] = sinrad[1] = sinrad[2] = 0; + sinhalfrad[0] = sinhalfrad[1] = sinhalfrad[2] = 0; + vinrad[0] = vinrad[1] = vinrad[2] = 0; + vinhalfrad[0] = vinhalfrad[1] = vinhalfrad[2] = 0; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + for(i = 0; i < num; i++) + { + /* identify particle type */ + p = d[i].index; + int ptype = P[p].Type; + + /* calculate particle radius */ + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + rr_tmp += ddxx * ddxx; + } + rr_tmp = sqrt(rr_tmp); + + /* properties inside SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) */ + if(rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4]) + { + massinrad += P[p].Mass; + massinrad_tab[ptype] += P[p].Mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); // comoving (as it should be.) + sinrad[j] += P[p].Mass * ddxx; + vinrad[j] += P[p].Mass * P[p].Vel[j]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + if(ptype == 0) + { + if(P[p].Type == 0) + { +#ifdef USE_SFR + sfrinrad += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */ +#endif /* #ifdef USE_SFR */ + } + } + } + + /* properties inside (stellar half mass radius) */ + if(rr_tmp < 1.0 * halfmassradtype[4]) + { + massinhalfrad += P[p].Mass; + massinhalfrad_tab[ptype] += P[p].Mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); // comoving (as it should be.) + sinhalfrad[j] += P[p].Mass * ddxx; + vinhalfrad[j] += P[p].Mass * P[p].Vel[j]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + if(ptype == 0) + { + if(P[p].Type == 0) + { +#ifdef USE_SFR + sfrinhalfrad += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */ +#endif /* #ifdef USE_SFR */ + } + } + } + + /* properties inside (radius of Vmax) */ + if(rr_tmp < 1.0 * vmaxrad) + { + massinmaxrad += P[p].Mass; + massinmaxrad_tab[ptype] += P[p].Mass; + + if(ptype == 0) + { + if(P[p].Type == 0) + { +#ifdef USE_SFR + sfrinmaxrad += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */ +#endif /* #ifdef USE_SFR */ + } + } + } + } + + /* properties of star forming gas */ +#ifdef USE_SFR + for(i = 0; i < num; i++) + { + p = d[i].index; + + if(P[p].Type == 0) + { + if(SphP[PS[p].OldIndex].Sfr > 0) + { + gasMassSfr += P[p].Mass; + } + } + } +#endif /* #ifdef USE_SFR */ + +#ifdef MHD + bfld_halo = bfld_disk = bfld_vol_halo = bfld_vol_disk = 0; + + for(i = 0; i < num; i++) + { + p = d[i].index; + + if(P[p].Type == 0) + { + double bfld2 = (SphP[PS[p].OldIndex].B[0] * SphP[PS[p].OldIndex].B[0]) + + (SphP[PS[p].OldIndex].B[1] * SphP[PS[p].OldIndex].B[1]) + + (SphP[PS[p].OldIndex].B[2] * SphP[PS[p].OldIndex].B[2]); + double vol = SphP[PS[p].OldIndex].Volume; + + bfld_halo += bfld2 * vol; + bfld_vol_halo += vol; + + /* calculate particle radius */ + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + rr_tmp += ddxx * ddxx; + } + rr_tmp = sqrt(rr_tmp); + + if(rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4]) + { + bfld_disk += bfld2 * vol; + bfld_vol_disk += vol; + } + } + } +#endif /* #ifdef MHD */ + + if(parallel_flag) + { + double massinradtot, massinrad_tabtot[NTYPES]; + MPI_Allreduce(&massinrad, &massinradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(massinrad_tab, massinrad_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + massinrad = massinradtot; + for(j = 0; j < NTYPES; j++) + massinrad_tab[j] = massinrad_tabtot[j]; + + double massinhalfradtot, massinhalfrad_tabtot[NTYPES]; + MPI_Allreduce(&massinhalfrad, &massinhalfradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(massinhalfrad_tab, massinhalfrad_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + massinhalfrad = massinhalfradtot; + for(j = 0; j < NTYPES; j++) + massinhalfrad_tab[j] = massinhalfrad_tabtot[j]; + + double massinmaxradtot, massinmaxrad_tabtot[NTYPES]; + MPI_Allreduce(&massinmaxrad, &massinmaxradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(massinmaxrad_tab, massinmaxrad_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + massinmaxrad = massinmaxradtot; + for(j = 0; j < NTYPES; j++) + massinmaxrad_tab[j] = massinmaxrad_tabtot[j]; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + MPI_Allreduce(MPI_IN_PLACE, sinrad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, vinrad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, sinhalfrad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, vinhalfrad, 3, MPI_DOUBLE, MPI_SUM, SubComm); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef MHD + double bfld_halo_tot, bfld_disk_tot, bfld_vol_halo_tot, bfld_vol_disk_tot; + MPI_Allreduce(&bfld_halo, &bfld_halo_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&bfld_vol_halo, &bfld_vol_halo_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&bfld_disk, &bfld_disk_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&bfld_vol_disk, &bfld_vol_disk_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + + bfld_halo = bfld_halo_tot; + bfld_vol_halo = bfld_vol_halo_tot; + bfld_disk = bfld_disk_tot; + bfld_vol_disk = bfld_vol_disk_tot; +#endif /* #ifdef MHD */ + +#ifdef USE_SFR + double sfrinradtot; + MPI_Allreduce(&sfrinrad, &sfrinradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + sfrinrad = sfrinradtot; + + double sfrinhalfradtot; + MPI_Allreduce(&sfrinhalfrad, &sfrinhalfradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + sfrinhalfrad = sfrinhalfradtot; + + double sfrinmaxradtot; + MPI_Allreduce(&sfrinmaxrad, &sfrinmaxradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + sfrinmaxrad = sfrinmaxradtot; + + double gasMassSfrtot; + MPI_Allreduce(&gasMassSfr, &gasMassSfrtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + gasMassSfr = gasMassSfrtot; +#endif /* #ifdef USE_SFR */ + } + + if(parallel_flag) + MPI_Allreduce(&num, &totlen, 1, MPI_INT, MPI_SUM, SubComm); + else + totlen = num; + +#ifdef MHD + if(bfld_vol_halo > 0.) + bfld_halo = sqrt(bfld_halo / bfld_vol_halo); + if(bfld_vol_disk > 0.) + bfld_disk = sqrt(bfld_disk / bfld_vol_disk); +#endif /* #ifdef MHD */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + // finish centre of mass of spheres + for(j = 0; j < 3; j++) + { + if(massinrad > 0) + { + sinrad[j] /= massinrad; + sinrad[j] += pos[j]; + + while(sinrad[j] < 0) + sinrad[j] += boxsize; + while(sinrad[j] >= boxsize) + sinrad[j] -= boxsize; + + vinrad[j] /= massinrad; // this is comoving (as it should be.) + } + + if(massinhalfrad > 0) + { + sinhalfrad[j] /= massinhalfrad; + sinhalfrad[j] += pos[j]; + + while(sinhalfrad[j] < 0) + sinhalfrad[j] += boxsize; + while(sinhalfrad[j] >= boxsize) + sinhalfrad[j] -= boxsize; + + vinhalfrad[j] /= massinhalfrad; + } + } + + Jtot_inHalfRad[0] = Jtot_inHalfRad[1] = Jtot_inHalfRad[2] = 0; + Jdm_inHalfRad[0] = Jdm_inHalfRad[1] = Jdm_inHalfRad[2] = 0; + Jgas_inHalfRad[0] = Jgas_inHalfRad[1] = Jgas_inHalfRad[2] = 0; + Jstars_inHalfRad[0] = Jstars_inHalfRad[1] = Jstars_inHalfRad[2] = 0; + Jtot_inRad[0] = Jtot_inRad[1] = Jtot_inRad[2] = 0; + Jdm_inRad[0] = Jdm_inRad[1] = Jdm_inRad[2] = 0; + Jgas_inRad[0] = Jgas_inRad[1] = Jgas_inRad[2] = 0; + Jstars_inRad[0] = Jstars_inRad[1] = Jstars_inRad[2] = 0; + + for(i = 0; i < num; i++) + { + /* identify particle type */ + p = d[i].index; + + /* calculate particle radius */ + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + rr_tmp += ddxx * ddxx; + } + rr_tmp = sqrt(rr_tmp); + + int ptype = P[p].Type; + + /* properties inside SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) */ + if((massinrad > 0) && (rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4])) + { + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - vinrad[j]); + dv[j] += H_of_a * dx[j]; + } + + Jtot_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jtot_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jtot_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if(ptype == 1) // dm illustris + { + Jdm_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jdm_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jdm_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 0) // gas + { + Jgas_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jgas_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jgas_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 4) // stars + { + Jstars_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jstars_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jstars_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + } + + /* properties inside (stellar half mass radius) */ + if((massinhalfrad > 0) && (rr_tmp < 1.0 * halfmassradtype[4])) + { + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - vinhalfrad[j]); + dv[j] += H_of_a * dx[j]; + } + + Jtot_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jtot_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jtot_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if(ptype == 1) // dm illustris + { + Jdm_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jdm_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jdm_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 0) // gas + { + Jgas_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jgas_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jgas_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 4) // stars + { + Jstars_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jstars_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jstars_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + } + } + + if(parallel_flag) + { + MPI_Allreduce(MPI_IN_PLACE, Jtot_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jdm_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jgas_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jstars_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jtot_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jdm_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jgas_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jstars_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + } + + // counter rotating mass fractions + CMFrac_inHalfRad = CMFrac_inRad = 0; + for(i = 0; i < NTYPES; i++) + CMFracType_inHalfRad[i] = CMFracType_inRad[i] = 0; + + for(i = 0; i < num; i++) + { + /* identify particle type */ + p = d[i].index; + + /* calculate particle radius */ + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); // counter-rotating mass calc with respect to pot min + rr_tmp += ddxx * ddxx; + } + rr_tmp = sqrt(rr_tmp); + + int ptype = P[p].Type; + + /* properties inside SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) */ + if((massinrad > 0) && (rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4])) + { + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - vinrad[j]); + dv[j] += H_of_a * dx[j]; + } + + jpart[0] = P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + jpart[1] = P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + jpart[2] = P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if((Jtot_inRad[0] * jpart[0] + Jtot_inRad[1] * jpart[1] + Jtot_inRad[2] * jpart[2]) < 0.) + CMFrac_inRad += P[p].Mass / massinrad; + + if(ptype == 1) // dm illustris + if((Jdm_inRad[0] * jpart[0] + Jdm_inRad[1] * jpart[1] + Jdm_inRad[2] * jpart[2]) < 0.) + CMFracType_inRad[1] += P[p].Mass / massinrad_tab[1]; + if(ptype == 0) // gas (incl. winds!) + if((Jgas_inRad[0] * jpart[0] + Jgas_inRad[1] * jpart[1] + Jgas_inRad[2] * jpart[2]) < 0.) + CMFracType_inRad[0] += P[p].Mass / massinrad_tab[0]; + if(ptype == 4) // stars + if((Jstars_inRad[0] * jpart[0] + Jstars_inRad[1] * jpart[1] + Jstars_inRad[2] * jpart[2]) < 0.) + CMFracType_inRad[4] += P[p].Mass / massinrad_tab[4]; + } + + /* properties inside (stellar half mass radius) */ + if((massinhalfrad > 0) && (rr_tmp < 1.0 * halfmassradtype[4])) + { + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - vinhalfrad[j]); + dv[j] += H_of_a * dx[j]; + } + + jpart[0] = P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + jpart[1] = P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + jpart[2] = P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if((Jtot_inHalfRad[0] * jpart[0] + Jtot_inHalfRad[1] * jpart[1] + Jtot_inHalfRad[2] * jpart[2]) < 0.) + CMFrac_inHalfRad += P[p].Mass / massinhalfrad; + + if(ptype == 1) // dm illustris + if((Jdm_inHalfRad[0] * jpart[0] + Jdm_inHalfRad[1] * jpart[1] + Jdm_inHalfRad[2] * jpart[2]) < 0.) + CMFracType_inHalfRad[1] += P[p].Mass / massinhalfrad_tab[1]; + if(ptype == 0) // gas (incl. winds!) + if((Jgas_inHalfRad[0] * jpart[0] + Jgas_inHalfRad[1] * jpart[1] + Jgas_inHalfRad[2] * jpart[2]) < 0.) + CMFracType_inHalfRad[0] += P[p].Mass / massinhalfrad_tab[0]; + if(ptype == 4) // stars + if((Jstars_inHalfRad[0] * jpart[0] + Jstars_inHalfRad[1] * jpart[1] + Jstars_inHalfRad[2] * jpart[2]) < 0.) + CMFracType_inHalfRad[4] += P[p].Mass / massinhalfrad_tab[4]; + } + } + + if(parallel_flag) + { + MPI_Allreduce(MPI_IN_PLACE, &CMFrac_inRad, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &CMFrac_inHalfRad, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, CMFracType_inRad, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, CMFracType_inHalfRad, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + /* now store the calculated properties in the subgroup structure */ + if(parallel_flag == 0 || SubThisTask == 0) + { + subgroup->Len = totlen; + subgroup->Mass = mass; + subgroup->SubMassInRad = massinrad; + subgroup->SubMassInHalfRad = massinhalfrad; + subgroup->SubMassInMaxRad = massinmaxrad; +#ifdef SUBFIND_EXTENDED_PROPERTIES + subgroup->Ekin = Ekin; + subgroup->Epot = Epot; + subgroup->Ethr = Ethr; + subgroup->CMFrac = CMFrac; + subgroup->CMFrac_inHalfRad = CMFrac_inHalfRad; + subgroup->CMFrac_inRad = CMFrac_inRad; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef MHD + subgroup->Bfld_Halo = bfld_halo; + subgroup->Bfld_Disk = bfld_disk; +#endif /* #ifdef MHD */ + + for(j = 0; j < 6; j++) + { + subgroup->MassType[j] = mass_tab[j]; + subgroup->LenType[j] = len_type[j]; + subgroup->SubHalfMassRadType[j] = halfmassradtype[j]; + subgroup->SubMassInRadType[j] = massinrad_tab[j]; + subgroup->SubMassInHalfRadType[j] = massinhalfrad_tab[j]; + subgroup->SubMassInMaxRadType[j] = massinmaxrad_tab[j]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + subgroup->CMFracType[j] = CMFracType[j]; + subgroup->CMFracType_inHalfRad[j] = CMFracType_inHalfRad[j]; + subgroup->CMFracType_inRad[j] = CMFracType_inRad[j]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + for(j = 0; j < 3; j++) + { + subgroup->Pos[j] = pos[j]; + subgroup->Vel[j] = vel[j]; + subgroup->CM[j] = cm[j]; + subgroup->Spin[j] = spin[j]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + subgroup->J[j] = Jtot[j]; + subgroup->Jdm[j] = Jdm[j]; + subgroup->Jgas[j] = Jgas[j]; + subgroup->Jstars[j] = Jstars[j]; + subgroup->J_inHalfRad[j] = Jtot_inHalfRad[j]; + subgroup->Jdm_inHalfRad[j] = Jdm_inHalfRad[j]; + subgroup->Jgas_inHalfRad[j] = Jgas_inHalfRad[j]; + subgroup->Jstars_inHalfRad[j] = Jstars_inHalfRad[j]; + subgroup->J_inRad[j] = Jtot_inRad[j]; + subgroup->Jdm_inRad[j] = Jdm_inRad[j]; + subgroup->Jgas_inRad[j] = Jgas_inRad[j]; + subgroup->Jstars_inRad[j] = Jstars_inRad[j]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + + subgroup->SubMostBoundID = mostboundid; + subgroup->SubVelDisp = veldisp; + subgroup->SubVmax = vmax; + subgroup->SubVmaxRad = vmaxrad; + subgroup->SubHalfMassRad = halfmassrad; + +#ifdef USE_SFR + subgroup->Sfr = sfr; + subgroup->SfrInRad = sfrinrad; + subgroup->SfrInHalfRad = sfrinhalfrad; + subgroup->SfrInMaxRad = sfrinmaxrad; + subgroup->GasMassSfr = gasMassSfr; +#endif /* #ifdef USE_SFR */ + } +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_reprocess.c b/src/amuse/community/arepo/src/subfind/subfind_reprocess.c new file mode 100644 index 0000000000..c189d86001 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_reprocess.c @@ -0,0 +1,240 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_fof_reprocess.c + * \date 05/2018 + * \brief Routines to calculate additional group properties. + * \details contains functions: + * void subfind_add_grp_props_calc_fof_angular_momentum(int num, + * int ngroups_cat) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../fof/fof.h" +#include "subfind.h" + +#ifdef SUBFIND_EXTENDED_PROPERTIES +/*! \brief Angular Momentum calculation for groups. + * + * \param[in] num Index of snapshot. + * \param[in] ngroups_cat Number of groups in group file. + * + * \return void + */ +void subfind_add_grp_props_calc_fof_angular_momentum(int num, int ngroups_cat) +{ + mpi_printf("FOF: Begin Angular Momentum Calculation for FOF Groups.\n"); + + /* assign target CPUs for the particles in groups */ + /* the particles not in groups will be distributed such that a uniform particle load results */ + double t0 = second(); + int *count_loc_task = mymalloc_clear("count_loc_task", NTask * sizeof(int)); + int *count_task = mymalloc("count_task", NTask * sizeof(int)); + int *count_free = mymalloc("count_free", NTask * sizeof(int)); + int count_loc_free = 0; + + for(int i = 0; i < NumPart; i++) + { + if(PS[i].GrNr < 0) + terminate("PS[i].GrNr=%d", PS[i].GrNr); + + if(PS[i].GrNr < TotNgroups) /* particle is in a group */ + { + if(PS[i].GrNr < Ncollective) /* we are in a collective group */ + PS[i].TargetTask = ProcAssign[PS[i].GrNr].FirstTask + (i % ProcAssign[PS[i].GrNr].NTask); + else + PS[i].TargetTask = ((PS[i].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective; + + if(PS[i].TargetTask < 0 || PS[i].TargetTask >= NTask) + terminate("PS[i].TargetTask=%d PS[i].GrNr=%d", PS[i].TargetTask, PS[i].GrNr); + + count_loc_task[PS[i].TargetTask]++; + } + else + count_loc_free++; + + PS[i].TargetIndex = 0; /* unimportant here */ + } + + MPI_Allgather(&count_loc_free, 1, MPI_INT, count_free, 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allreduce(count_loc_task, count_task, NTask, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + long long sum = 0; + for(int i = 0; i < NTask; i++) + sum += count_task[i] + count_free[i]; + + int maxload = (sum + NTask - 1) / NTask; + for(int i = 0; i < NTask; i++) + { + count_task[i] = maxload - count_task[i]; /* this is the amount that can fit on this task */ + if(count_task[i] < 0) + count_task[i] = 0; + } + + int current_task = 0; + + for(int i = 0; i < ThisTask; i++) + { + while(count_free[i] > 0 && current_task < NTask) + { + if(count_free[i] < count_task[current_task]) + { + count_task[current_task] -= count_free[i]; + count_free[i] = 0; + } + else + { + count_free[i] -= count_task[current_task]; + count_task[current_task] = 0; + current_task++; + } + } + } + + for(int i = 0; i < NumPart; i++) + { + if(PS[i].GrNr >= + TotNgroups) /* particle not in a group. Can in principle stay but we move it such that a good load balance is obtained. */ + { + while(count_task[current_task] == 0 && current_task < NTask - 1) + current_task++; + + PS[i].TargetTask = current_task; /* particle not in any group, move it here so that uniform load is achieved */ + count_task[current_task]--; + } + } + + myfree(count_free); + myfree(count_task); + myfree(count_loc_task); + + double balance = subfind_get_particle_balance(); + mpi_printf("SUBFIND: particle balance=%g\n", balance); + + /* distribute particles such that groups are completely on the CPU(s) that do the corresponding group(s) */ + fof_subfind_exchange(MPI_COMM_WORLD); + double t1 = second(); + mpi_printf("SUBFIND: subfind_exchange() took %g sec\n", timediff(t0, t1)); + + balance = subfind_get_particle_balance(); + mpi_printf("SUBFIND: particle balance for AM processing=%g\n", balance); + + /* we can now split the communicator to give each collectively treated group its own processor set */ + MPI_Comm_split(MPI_COMM_WORLD, CommSplitColor, ThisTask, &SubComm); + MPI_Comm_size(SubComm, &SubNTask); + MPI_Comm_rank(SubComm, &SubThisTask); + SubTagOffset = TagOffset; + + /* here the execution paths for collective groups and serial groups branch. The collective CPUs work in small sets that each + * deal with one large group. The serial CPUs each deal with several halos by themselves + */ + if(CommSplitColor < Ncollective) /* we are one of the CPUs that does a collective group */ + { + /* we now apply a collective version of subfind to the group split across the processors belonging to communicator SubComm + * The relevant group is the one stored in Group[0] on SubThisTask==0. + */ + subfind_fof_calc_am_collective(num, ngroups_cat); + } + else + { + /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! + */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(int i = 0; i < NumPart; i++) + { + PS[i].OldIndex = i; + submp[i].index = i; + submp[i].GrNr = PS[i].GrNr; + submp[i].DM_Density = PS[i].Density; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density); + subfind_reorder_according_to_submp(); + myfree(submp); + + /* now we have the particles in each group consecutively */ + if(SubThisTask == 0) + printf("SUBFIND-SERIAL: Start to do AM for %d small groups with serial subfind algorithm on %d processors (root-node=%d)\n", + TotNgroups - Ncollective, SubNTask, ThisTask); + + /* we now apply a serial version of subfind to the local groups */ + + t0 = second(); + for(int gr = 0, offset = 0; gr < Ngroups; gr++) + { + if(((Group[gr].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective == ThisTask) + offset = subfind_fof_calc_am_serial(gr, offset, num, ngroups_cat); + else + terminate("how come that we have this group number?"); + } + + MPI_Barrier(SubComm); + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-SERIAL: processing AM of serial groups took %g sec\n", timediff(t0, t1)); + + /* undo local rearrangement that made groups consecutive. After that, the association of SphP[] will be correct again */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(int i = 0; i < NumPart; i++) + { + submp[i].index = i; + submp[i].OldIndex = PS[i].OldIndex; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex); + subfind_reorder_according_to_submp(); + myfree(submp); + } + + /* free the communicator */ + MPI_Comm_free(&SubComm); + + /* distribute particles back to original CPU */ + t0 = second(); + for(int i = 0; i < NumPart; i++) + { + PS[i].TargetTask = PS[i].OriginTask; + PS[i].TargetIndex = PS[i].OriginIndex; + } + + fof_subfind_exchange(MPI_COMM_WORLD); + t1 = second(); + if(ThisTask == 0) + printf("SUBFIND: subfind_exchange() (for return to original CPU after AM) took %g sec\n", timediff(t0, t1)); + + mpi_printf("FOF: Angular Momentum Calculation for FOF Groups finished successfully.\n"); +} +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_serial.c b/src/amuse/community/arepo/src/subfind/subfind_serial.c new file mode 100644 index 0000000000..acc996ed02 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_serial.c @@ -0,0 +1,807 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_serial.c + * \date 05/2018 + * \brief Processes the local groups in serial mode. + * \details contains functions: + * int subfind_process_group_serial(int gr, int Offs, int + * nsubgroups_cat) + * int subfind_unbind(struct unbind_data *ud, int len, int + * *len_non_gas) + * int subfind_fof_calc_am_serial(int gr, int Offs, int snapnr, + * int ngroups_cat) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../fof/fof.h" +#include "subfind.h" + +static int *Head, *Next, *Tail, *Len; + +/*! \brief Subhalo finding on each core individually. + * + * \param[in] gr Group index. + * \param[in] Offs Offset. + * \param[in] nsubgroups_cat (unused) + * + * \return New offset. + */ +int subfind_process_group_serial(int gr, int Offs, int nsubgroups_cat) +{ + int i, j, k, p, count_cand, count, len, len_non_gas, N, nsubs, part_index, subnr, totlen; + static struct unbind_data *ud; + + while(PS[Offs].GrNr != Group[gr].GrNr) + { + Offs++; + if(Offs >= NumPart) + { + char buf[1000]; + sprintf(buf, "don't find a particle for groupnr=%d\n", Group[gr].GrNr); + + for(int i = 0; i < NumPart; i++) + printf("task=%d i=%d PS[i].GrNr=%d\n", ThisTask, i, PS[i].GrNr); + + terminate(buf); + } + } + + N = Group[gr].Len; + GrNr = Group[gr].GrNr; + + subfind_loctree_treeallocate((int)(All.TreeAllocFactor * N) + NTopnodes, NumPart); + + for(int i = 0; i < N; i++) + if(PS[Offs + i].GrNr != Group[gr].GrNr) + terminate("task=%d, gr=%d: don't have the number of particles for GrNr=%d i=%d group-len:N=%d found=%d before=%d\n", ThisTask, + gr, Group[gr].GrNr, i, N, PS[Offs + i].GrNr, PS[Offs - 1].GrNr); + + candidates = (struct cand_dat *)mymalloc_movable(&candidates, "candidates", N * sizeof(struct cand_dat)); + + Head = (int *)mymalloc_movable(&Head, "Head", N * sizeof(int)); + Next = (int *)mymalloc_movable(&Next, "Next", N * sizeof(int)); + Tail = (int *)mymalloc_movable(&Tail, "Tail", N * sizeof(int)); + Len = (int *)mymalloc_movable(&Len, "Len", N * sizeof(int)); + ud = (struct unbind_data *)mymalloc_movable(&ud, "ud", N * sizeof(struct unbind_data)); + + Head -= Offs; + Next -= Offs; + Tail -= Offs; + Len -= Offs; + + for(int i = 0; i < N; i++) + ud[i].index = Offs + i; + + subfind_loctree_findExtent(N, ud); + + subfind_loctree_treebuild(N, &ud); /* build tree for all particles of this group */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + // compute the binding energy of FOF group + double Epot = 0; + for(int i = 0; i < N; i++) + { + int p = ud[i].index; + double pot = subfind_loctree_treeevaluate_potential(p); + + // note: add self-energy + pot += P[p].Mass / (All.ForceSoftening[P[p].SofteningType] / 2.8); // (P[p].Soft / 2.8); + + // multiply with G, scale by scale factor + pot *= All.G / All.cf_atime; + + Epot += (P[p].Mass / 2) * pot; + } + Group[gr].Epot = Epot; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + for(int i = Offs; i < Offs + N; i++) + Head[i] = Next[i] = Tail[i] = -1; + + /* note: particles are already ordered in the order of decreasing density */ + + int ss, ngbs, ndiff, head = 0, head_attach; + int listofdifferent[2], prev; + int ngb_index, rank; + int desngb = All.DesLinkNgb; + + for(i = 0, count_cand = 0; i < N; i++) + { + part_index = Offs + i; + + MyDouble *pos; +#ifdef CELL_CENTER_GRAVITY + if(P[part_index].Type == 0) + pos = PS[part_index].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[part_index].Pos; + + subfind_locngb_treefind(pos, desngb, PS[part_index].Hsml); + + /* note: returned neighbours are already sorted by distance */ + + for(k = 0, ndiff = 0, ngbs = 0; k < desngb && ngbs < 2 && ndiff < 2; k++) + { + ngb_index = R2list[k].index; + + if(ngb_index != part_index) /* to exclude the particle itself */ + { + /* we only look at neighbours that are denser */ + if(PS[ngb_index].Density > PS[part_index].Density) + { + ngbs++; + + if(Head[ngb_index] >= 0) /* neighbor is attached to a group */ + { + if(ndiff == 1) + if(listofdifferent[0] == Head[ngb_index]) + continue; + + /* a new group has been found */ + listofdifferent[ndiff++] = Head[ngb_index]; + } + else + terminate( + "this may not occur: ThisTask=%d gr=%d k=%d i=%d part_index=%d ngb_index = %d head[ngb_index]=%d " + "P[part_index].DM_Density=%g %g GrNrs= %d %d \n", + ThisTask, gr, k, i, part_index, ngb_index, Head[ngb_index], PS[part_index].Density, PS[ngb_index].Density, + PS[part_index].GrNr, PS[ngb_index].GrNr); + } + } + } + + switch(ndiff) /* treat the different possible cases */ + { + case 0: /* this appears to be a lonely maximum -> new group */ + head = part_index; + Head[part_index] = Tail[part_index] = part_index; + Len[part_index] = 1; + Next[part_index] = -1; + break; + + case 1: /* the particle is attached to exactly one group */ + head = listofdifferent[0]; + Head[part_index] = head; + Next[Tail[head]] = part_index; + Tail[head] = part_index; + Len[head]++; + Next[part_index] = -1; + break; + + case 2: /* the particle merges two groups together */ + head = listofdifferent[0]; + head_attach = listofdifferent[1]; + if(Len[head_attach] > Len[head] || + (Len[head_attach] == Len[head] && + head_attach < head)) /* other group is longer, swap them. for equal length, take the larger head value */ + { + head = listofdifferent[1]; + head_attach = listofdifferent[0]; + } + + /* only in case the attached group is long enough we bother to register is + as a subhalo candidate */ + + if(Len[head_attach] >= All.DesLinkNgb) + { + candidates[count_cand].len = Len[head_attach]; + candidates[count_cand].head = Head[head_attach]; + count_cand++; + } + + /* now join the two groups */ + Next[Tail[head]] = head_attach; + Tail[head] = Tail[head_attach]; + Len[head] += Len[head_attach]; + + ss = head_attach; + do + { + Head[ss] = head; + } + while((ss = Next[ss]) >= 0); + + /* finally, attach the particle */ + Head[part_index] = head; + Next[Tail[head]] = part_index; + Tail[head] = part_index; + Len[head]++; + Next[part_index] = -1; + break; + + default: + terminate("can't be!"); + break; + } + } + + /* add the full thing as a subhalo candidate */ + for(i = 0, prev = -1; i < N; i++) + { + if(Head[Offs + i] == Offs + i) + if(Next[Tail[Offs + i]] == -1) + { + if(prev < 0) + head = Offs + i; + if(prev >= 0) + Next[prev] = Offs + i; + + prev = Tail[Offs + i]; + } + } + + candidates[count_cand].len = N; + candidates[count_cand].head = head; + count_cand++; + + /* go through them once and assign the rank */ + for(i = 0, p = head, rank = 0; i < N; i++) + { + Len[p] = rank++; + p = Next[p]; + } + + /* for each candidate, we now pull out the rank of its head */ + for(k = 0; k < count_cand; k++) + candidates[k].rank = Len[candidates[k].head]; + + for(i = Offs; i < Offs + N; i++) + Tail[i] = -1; + + for(k = 0, nsubs = 0; k < count_cand; k++) + { + for(i = 0, p = candidates[k].head, len = 0; i < candidates[k].len; i++, p = Next[p]) + if(Tail[p] < 0) + ud[len++].index = p; + + if(len >= All.DesLinkNgb) + len = subfind_unbind(ud, len, &len_non_gas); + + if(len >= All.DesLinkNgb) + { + /* ok, we found a substructure */ + + for(i = 0; i < len; i++) + Tail[ud[i].index] = nsubs; /* we use this to flag the substructures */ + + candidates[k].nsub = nsubs; + candidates[k].bound_length = len; + nsubs++; + } + else + { + candidates[k].nsub = -1; + candidates[k].bound_length = 0; + } + } + +#ifdef VERBOSE + printf("\nGroupLen=%d (gr=%d)\n", N, gr); + printf("Number of substructures: %d (before unbinding: %d)\n", nsubs, count_cand); +#endif /* #ifdef VERBOSE */ + + mysort(candidates, count_cand, sizeof(struct cand_dat), subfind_compare_serial_candidates_boundlength); + + /* now we determine the parent subhalo for each candidate */ + for(k = 0; k < count_cand; k++) + { + candidates[k].subnr = k; + candidates[k].parent = 0; + } + + mysort(candidates, count_cand, sizeof(struct cand_dat), subfind_compare_serial_candidates_rank); + + for(k = 0; k < count_cand; k++) + { + for(j = k + 1; j < count_cand; j++) + { + if(candidates[j].rank > candidates[k].rank + candidates[k].len) + break; + + if(candidates[k].rank + candidates[k].len >= candidates[j].rank + candidates[j].len) + { + if(candidates[k].bound_length >= All.DesLinkNgb) + candidates[j].parent = candidates[k].subnr; + } + else + { + char buf[1000]; + sprintf(buf, "k=%d|%d has rank=%d and len=%d. j=%d has rank=%d and len=%d bound=%d\n", k, count_cand, + (int)candidates[k].rank, candidates[k].len, (int)candidates[k].bound_length, candidates[j].rank, + (int)candidates[j].len, candidates[j].bound_length); + terminate(buf); + } + } + } + + mysort(candidates, count_cand, sizeof(struct cand_dat), subfind_compare_serial_candidates_subnr); + + /* now determine the properties */ + Group[gr].Nsubs = nsubs; + Group[gr].Pos[0] = Group[gr].CM[0]; + Group[gr].Pos[1] = Group[gr].CM[1]; + Group[gr].Pos[2] = Group[gr].CM[2]; + + for(k = 0, subnr = 0, totlen = 0; k < nsubs; k++) + { + len = candidates[k].bound_length; + +#ifdef VERBOSE + printf("subnr=%d SubLen=%d\n", subnr, len); +#endif /* #ifdef VERBOSE */ + + totlen += len; + + for(i = 0, p = candidates[k].head, count = 0; i < candidates[k].len; i++) + { + if(Tail[p] == candidates[k].nsub) + ud[count++].index = p; + + p = Next[p]; + } + + if(count != len) + terminate("count=%d != len=%d k=%d subnr=%d nsubs=%d", count, len, k, subnr, nsubs); + + if(Nsubgroups > MaxNsubgroups) + terminate("Nsubgroups = %d >= MaxNsubgroups = %d", Nsubgroups, MaxNsubgroups); + + subfind_determine_sub_halo_properties(ud, len, &SubGroup[Nsubgroups], GrNr, subnr, 0, nsubgroups_cat); + + SubGroup[Nsubgroups].SubParent = candidates[k].parent; + SubGroup[Nsubgroups].SubNr = subnr; + SubGroup[Nsubgroups].GrNr = Group[gr].GrNr; + + if(subnr == 0) + { + for(j = 0; j < 3; j++) + Group[gr].Pos[j] = SubGroup[Nsubgroups].Pos[j]; + } + + Nsubgroups++; + + /* Let's now assign the subgroup number */ + + for(i = 0; i < len; i++) + PS[ud[i].index].SubNr = subnr; + + subnr++; + } + +#ifdef VERBOSE + printf("Fuzz=%d\n", N - totlen); +#endif /* #ifdef VERBOSE */ + + myfree(ud); + myfree(Len + Offs); + myfree(Tail + Offs); + myfree(Next + Offs); + myfree(Head + Offs); + + myfree(candidates); + + subfind_loctree_treefree(); + + return Offs; +} + +/*! \brief Unbinding algorithm. + * + * \param[in, out] ud Unbind data. + * \param[in] len length of ud array. + * \param[out] len_non_gas Number of particles which are not gas cells. + * + * \return Length of array minus the unbound particles. + */ +int subfind_unbind(struct unbind_data *ud, int len, int *len_non_gas) +{ + double *bnd_energy, energy_limit, weakly_bound_limit = 0; + int i, j, p, minindex, unbound, phaseflag, iter = 0; + double ddxx, s[3], dx[3], v[3], dv[3], pos[3]; + double vel_to_phys, H_of_a, atime, pot, minpot = 0; + double boxsize, xtmp; + double TotMass; + + boxsize = All.BoxSize; + + if(All.ComovingIntegrationOn) + { + vel_to_phys = 1.0 / All.Time; + H_of_a = hubble_function(All.Time); + atime = All.Time; + } + else + { + vel_to_phys = atime = 1; + H_of_a = 0; + } + + bnd_energy = (double *)mymalloc("bnd_energy", len * sizeof(double)); + + phaseflag = 0; /* this means we will recompute the potential for all particles */ + + do + { + subfind_loctree_treebuild(len, &ud); + + /* let's compute the potential */ + + if(phaseflag == 0) /* redo it for all the particles */ + { + for(i = 0, minindex = -1, minpot = 1.0e30; i < len; i++) + { + p = ud[i].index; + + pot = subfind_loctree_treeevaluate_potential(p); + + PS[p].Potential = All.G / All.cf_atime * pot; + + if(PS[p].Potential < minpot || minindex == -1) + { + minpot = PS[p].Potential; + minindex = p; + } + } + +#ifdef CELL_CENTER_GRAVITY + if(P[minindex].Type == 0) + { + for(j = 0; j < 3; j++) + pos[j] = PS[minindex].Center[j]; /* position of minimum potential */ + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(j = 0; j < 3; j++) + pos[j] = P[minindex].Pos[j]; /* position of minimum potential */ + } + } + else + { + /* we only repeat for those close to the unbinding threshold */ + for(i = 0; i < len; i++) + { + p = ud[i].index; + + if(PS[p].BindingEnergy >= weakly_bound_limit) + { + pot = subfind_loctree_treeevaluate_potential(p); + + PS[p].Potential *= All.G / All.cf_atime; + } + } + } + + /* let's get bulk velocity and the center-of-mass */ + + v[0] = v[1] = v[2] = 0; + s[0] = s[1] = s[2] = 0; + + for(i = 0, TotMass = 0; i < len; i++) + { + p = ud[i].index; + + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[p].Type == 0) + ddxx = GRAVITY_NEAREST_X(PS[p].Center[j] - pos[j]); + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + s[j] += P[p].Mass * ddxx; + v[j] += P[p].Mass * P[p].Vel[j]; + } + TotMass += P[p].Mass; + } + + for(j = 0; j < 3; j++) + { + v[j] /= TotMass; + s[j] /= TotMass; /* center-of-mass */ + + s[j] += pos[j]; + + while(s[j] < 0) + s[j] += boxsize; + while(s[j] >= boxsize) + s[j] -= boxsize; + } + + for(i = 0; i < len; i++) + { + p = ud[i].index; + + for(j = 0; j < 3; j++) + { + dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]); +#ifdef CELL_CENTER_GRAVITY + if(P[p].Type == 0) + dx[j] = atime * GRAVITY_NEAREST_X(PS[p].Center[j] - s[j]); + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + dx[j] = atime * GRAVITY_NEAREST_X(P[p].Pos[j] - s[j]); + + dv[j] += H_of_a * dx[j]; + } + + PS[p].BindingEnergy = PS[p].Potential + 0.5 * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]); + PS[p].BindingEnergy += + All.G / All.cf_atime * P[p].Mass / (All.ForceSoftening[P[p].SofteningType] / 2.8); /* note: add self-energy */ + + if(P[p].Type == 0) + PS[p].BindingEnergy += PS[p].Utherm; + + bnd_energy[i] = PS[p].BindingEnergy; + } + + mysort(bnd_energy, len, sizeof(double), subfind_compare_binding_energy); /* largest comes first! */ + + energy_limit = bnd_energy[(int)(0.25 * len)]; + + for(i = 0, unbound = 0; i < len - 1; i++) + { + if(bnd_energy[i] > 0) + unbound++; + else + unbound--; + + if(unbound <= 0) + break; + } + weakly_bound_limit = bnd_energy[i]; + + /* now omit unbound particles, but at most 1/4 of the original size */ + + for(i = 0, unbound = 0, *len_non_gas = 0; i < len; i++) + { + p = ud[i].index; + if(PS[p].BindingEnergy > 0 && PS[p].BindingEnergy > energy_limit) + { + unbound++; + ud[i] = ud[len - 1]; + i--; + len--; + } + else if(P[p].Type != 0) + (*len_non_gas)++; + } + + if(len < All.DesLinkNgb) + break; + + if(phaseflag == 0) + { + if(unbound > 0) + phaseflag = 1; + } + else + { + if(unbound == 0) + { + phaseflag = 0; /* this will make us repeat everything once more for all particles */ + unbound = 1; + } + } + + if(iter++ > MAXITER) + terminate("iter > MAXITER = %d", MAXITER); + } + while(unbound > 0); + + myfree(bnd_energy); + + return (len); +} + +#ifdef SUBFIND_EXTENDED_PROPERTIES +/*! \brief Serial version of angular momentum calculation. + * + * \param[in] gr Group index. + * \param[in] Offs Offset of group (first index in PS). + * \param[in] snapnr (unused) + * \param[in] ngroups_cat (unused) + */ +int subfind_fof_calc_am_serial(int gr, int Offs, int snapnr, int ngroups_cat) +{ + long long index; + int len, i, k; + double Pos_pbc[3], Vel_tot[3], gr_Jtot[3], gr_Jdm[3], gr_Jgas[3], gr_Jstars[3], jpart[3]; + double gr_CMFrac, gr_CMFracType[NTYPES], gr_Ekin, gr_Ethr; + int gr_len_dm; + double gr_mass, gr_mass_gas, gr_mass_stars; + int ptype; + + while(PS[Offs].GrNr != Group[gr].GrNr) + { + Offs++; + if(Offs >= NumPart) + { + char buf[1000]; + sprintf(buf, "don't find a particle for groupnr=%d\n", Group[gr].GrNr); + + for(i = 0; i < NumPart; i++) + printf("task=%d i=%d PS[i].GrNr=%d\n", ThisTask, i, PS[i].GrNr); + + terminate(buf); + } + } + + len = Group[gr].Len; + + struct unbind_data *ud = (struct unbind_data *)mymalloc("ud", len * sizeof(struct unbind_data)); + + // get all fof particles + for(i = 0; i < len; i++) + ud[i].index = Offs + i; + + // initialize + gr_CMFrac = 0; + gr_Ekin = 0; + gr_Ethr = 0; + + for(k = 0; k < 3; k++) + { + gr_Jtot[k] = 0; + gr_Jdm[k] = 0; + gr_Jgas[k] = 0; + gr_Jstars[k] = 0; + } + for(k = 0; k < NTYPES; k++) + { + gr_CMFracType[k] = 0; + } + + // calc angular momentum for dm, gas, stars + for(k = 0; k < len; k++) + { + index = ud[k].index; + ptype = P[index].Type; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = P[index].Pos[i] - Group[gr].Pos[i]; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = fof_periodic(Pos_pbc[i]); + + for(i = 0; i < 3; i++) + Pos_pbc[i] = Pos_pbc[i] * All.cf_atime; // units: phys kpc/h + + for(i = 0; i < 3; i++) + Vel_tot[i] = P[index].Vel[i] / All.cf_atime - Group[gr].Vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i]; + + gr_Ekin += (P[index].Mass / 2) * (Vel_tot[0] * Vel_tot[0] + Vel_tot[1] * Vel_tot[1] + Vel_tot[2] * Vel_tot[2]); + if(P[index].Type == 0) + gr_Ethr += P[index].Mass * SphP[PS[index].OldIndex].Utherm; + + gr_Jtot[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jtot[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jtot[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + if(ptype == 1) // dm illustris + { + gr_Jdm[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jdm[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jdm[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 0) // gas (incl. winds) + { + gr_Jgas[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jgas[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jgas[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 4) // stars + { + gr_Jstars[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jstars[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jstars[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + } + + Group[gr].Ekin = gr_Ekin; + Group[gr].Ethr = gr_Ethr; + for(i = 0; i < 3; i++) + { + Group[gr].J[i] = gr_Jtot[i]; + Group[gr].JDM[i] = gr_Jdm[i]; + Group[gr].JGas[i] = gr_Jgas[i]; + Group[gr].JStars[i] = gr_Jstars[i]; + } + + // calc counter-rotating fractions + gr_len_dm = 0; + gr_mass = gr_mass_gas = gr_mass_stars = 0; + + for(k = 0; k < len; k++) + { + index = ud[k].index; + ptype = P[index].Type; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = P[index].Pos[i] - Group[gr].Pos[i]; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = fof_periodic(Pos_pbc[i]); + + for(i = 0; i < 3; i++) + Pos_pbc[i] = Pos_pbc[i] * All.cf_atime; // units: phys kpc/h + + for(i = 0; i < 3; i++) + Vel_tot[i] = P[index].Vel[i] / All.cf_atime - Group[gr].Vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i]; + + jpart[0] = P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + jpart[1] = P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + jpart[2] = P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + gr_mass += P[index].Mass; + if((gr_Jtot[0] * jpart[0] + gr_Jtot[1] * jpart[1] + gr_Jtot[2] * jpart[2]) < 0.) + gr_CMFrac += P[index].Mass; // / Group[gr].Mass; + + if(ptype == 1) // dm illustris + { + gr_len_dm++; + if((gr_Jdm[0] * jpart[0] + gr_Jdm[1] * jpart[1] + gr_Jdm[2] * jpart[2]) < 0.) + gr_CMFracType[1]++; + } + if(ptype == 0) // gas (incl. winds) + { + gr_mass_gas += P[index].Mass; + if((gr_Jgas[0] * jpart[0] + gr_Jgas[1] * jpart[1] + gr_Jgas[2] * jpart[2]) < 0.) + gr_CMFracType[0] += P[index].Mass; // / Group[gr].MassType[0]; + } + if(ptype == 4) // stars + { + gr_mass_stars += P[index].Mass; + if((gr_Jstars[0] * jpart[0] + gr_Jstars[1] * jpart[1] + gr_Jstars[2] * jpart[2]) < 0.) + gr_CMFracType[4] += P[index].Mass; // / Group[gr].MassType[4]; + } + } + + gr_CMFrac /= gr_mass; // Group[gr].Mass; + gr_CMFracType[1] /= gr_len_dm; + gr_CMFracType[0] /= gr_mass_gas; // Group[gr].MassType[0]; + gr_CMFracType[4] /= gr_mass_stars; // Group[gr].MassType[4]; + + Group[gr].CMFrac = gr_CMFrac; + for(i = 0; i < NTYPES; i++) + Group[gr].CMFracType[i] = gr_CMFracType[i]; + + myfree(ud); + return Offs; +} +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_so.c b/src/amuse/community/arepo/src/subfind/subfind_so.c new file mode 100644 index 0000000000..5f3774b6ea --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_so.c @@ -0,0 +1,964 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_so.c + * \date 05/2018 + * \brief Spherical overdensity algorithm for subfind. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * double subfind_overdensity(void) + * static int subfind_overdensity_evaluate(int target, int mode, + * int threadid) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND + +#include "../fof/fof.h" +#include "subfind.h" + +static double *R200, *M200; + +static char *Todo; +static MyFloat *Left, *Right; +static int mainstep; + +static int subfind_overdensity_evaluate(int target, int mode, int threadid); + +#ifdef SUBFIND_EXTENDED_PROPERTIES +/*! \brief Structure for angular momentum properties. + */ +static struct Angular_Momentum +{ + double Pmom[3]; + double MassType[NTYPES]; + double Jtot[3]; + double Jdm[3]; + double Jgas[3]; + double Jstars[3]; + int LenType[NTYPES]; + double CMFrac; + double CMFracType[NTYPES]; + double Ekin; + double Epot; + double Ethr; + double N200; +} * AngMom; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + double R200; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + double M200; + int GrNr; + int TaskOfGr; + int LocGrIndex; + struct Angular_Momentum AngMomIn; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant group data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in group arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = Group[i].Pos[0]; + in->Pos[1] = Group[i].Pos[1]; + in->Pos[2] = Group[i].Pos[2]; + in->R200 = R200[i]; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + in->GrNr = Group[i].GrNr; + in->TaskOfGr = ThisTask; + in->LocGrIndex = i; + in->M200 = M200[i]; + in->AngMomIn = AngMom[i]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + double Mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + struct Angular_Momentum AngMomOut; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and group data arrays (AngMom,...) + * \param[in] i Index of particle in group arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + if(mainstep == 0) + M200[i] = out->Mass; +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 0) + { + for(int k = 0; k < 3; k++) + AngMom[i].Pmom[k] = out->AngMomOut.Pmom[k]; + for(int k = 0; k < NTYPES; k++) + { + AngMom[i].MassType[k] = out->AngMomOut.MassType[k]; + AngMom[i].LenType[k] = out->AngMomOut.LenType[k]; + } + AngMom[i].N200 = out->AngMomOut.N200; + } + else if(mainstep == 1) + { + for(int k = 0; k < 3; k++) + { + AngMom[i].Jtot[k] = out->AngMomOut.Jtot[k]; + AngMom[i].Jdm[k] = out->AngMomOut.Jdm[k]; + AngMom[i].Jgas[k] = out->AngMomOut.Jgas[k]; + AngMom[i].Jstars[k] = out->AngMomOut.Jstars[k]; + } + AngMom[i].Ekin = out->AngMomOut.Ekin; + AngMom[i].Ethr = out->AngMomOut.Ethr; + } + else if(mainstep == 2) + { + AngMom[i].CMFrac = out->AngMomOut.CMFrac; + for(int k = 0; k < NTYPES; k++) + AngMom[i].CMFracType[k] = out->AngMomOut.CMFracType[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + else /* combine */ + { + if(mainstep == 0) + M200[i] += out->Mass; +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 0) + { + for(int k = 0; k < 3; k++) + AngMom[i].Pmom[k] += out->AngMomOut.Pmom[k]; + for(int k = 0; k < NTYPES; k++) + { + AngMom[i].MassType[k] += out->AngMomOut.MassType[k]; + AngMom[i].LenType[k] += out->AngMomOut.LenType[k]; + } + AngMom[i].N200 += out->AngMomOut.N200; + } + else if(mainstep == 1) + { + for(int k = 0; k < 3; k++) + { + AngMom[i].Jtot[k] += out->AngMomOut.Jtot[k]; + AngMom[i].Jdm[k] += out->AngMomOut.Jdm[k]; + AngMom[i].Jgas[k] += out->AngMomOut.Jgas[k]; + AngMom[i].Jstars[k] += out->AngMomOut.Jstars[k]; + } + AngMom[i].Ekin += out->AngMomOut.Ekin; + AngMom[i].Ethr += out->AngMomOut.Ethr; + } + else if(mainstep == 2) + { + AngMom[i].CMFrac += out->AngMomOut.CMFrac; + for(int k = 0; k < NTYPES; k++) + AngMom[i].CMFracType[k] += out->AngMomOut.CMFracType[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + { + int threadid = get_thread_num(); + + for(int j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= Ngroups) + break; + + if(Todo[i]) + { + R200[i] = 0.5 * (Left[i] + Right[i]); + subfind_overdensity_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_overdensity_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Main routine executing the spherical overdensity algorithm. + * + * \return Time needed for calculation. + */ +double subfind_overdensity(void) +{ + long long ntot; + int i, npleft, rep, iter; + double t0, t1, overdensity, Deltas[4], rhoback, z, omegaz, x, DeltaMean200, DeltaCrit200, DeltaCrit500, DeltaTopHat; + double tstart = second(); + + Left = (MyFloat *)mymalloc("Left", sizeof(MyFloat) * Ngroups); + Right = (MyFloat *)mymalloc("Right", sizeof(MyFloat) * Ngroups); + R200 = (double *)mymalloc("R200", sizeof(double) * Ngroups); + M200 = (double *)mymalloc("M200", sizeof(double) * Ngroups); +#ifdef SUBFIND_EXTENDED_PROPERTIES + AngMom = (struct Angular_Momentum *)mymalloc("AngMom", sizeof(struct Angular_Momentum) * Ngroups); + Paux = (struct paux_data *)mymalloc("Paux", sizeof(struct paux_data) * NumPart); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + Todo = mymalloc("Todo", sizeof(char) * Ngroups); + + if(All.ComovingIntegrationOn) + z = 1 / All.Time - 1; + else + z = 0; + + rhoback = 3 * All.Omega0 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + omegaz = + All.Omega0 * pow(1 + z, 3) / (All.Omega0 * pow(1 + z, 3) + (1 - All.Omega0 - All.OmegaLambda) * pow(1 + z, 2) + All.OmegaLambda); + + DeltaMean200 = 200.0; + DeltaCrit200 = 200.0 / omegaz; + DeltaCrit500 = 500.0 / omegaz; + + x = omegaz - 1; + DeltaTopHat = 18 * M_PI * M_PI + 82 * x - 39 * x * x; + DeltaTopHat /= omegaz; + + Deltas[0] = DeltaMean200; /* standard fixed overdensity with respect to background */ + Deltas[1] = DeltaTopHat; /* tophat overdensity with respect to background */ + Deltas[2] = DeltaCrit200; /* overdensity of 200 relative to critical, expressed relative to background density */ + Deltas[3] = DeltaCrit500; /* overdensity of 500 relative to critical, expressed relative to background density */ + + generic_set_MaxNexport(); + + for(rep = 0; rep < 4; rep++) /* repeat for all four overdensity values */ + { +#ifdef SUBFIND_EXTENDED_PROPERTIES + int mainstepmax = 3; +#else /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + int mainstepmax = 1; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES #else */ + for(mainstep = 0; mainstep < mainstepmax; mainstep++) + { + for(i = 0; i < Ngroups; i++) + { + if(Group[i].Nsubs > 0) + { + if(mainstep == 0) + { + double rguess = pow(All.G * Group[i].Mass / (100 * All.Hubble * All.Hubble), 1.0 / 3); + + Right[i] = 3 * rguess; + Left[i] = 0; + } + Todo[i] = 1; + } + else + { + Todo[i] = 0; + } + } + + iter = 0; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 1) + NumPaux = 0; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + /* we will repeat the whole thing for those groups where we didn't converge to a SO radius yet */ + do + { + t0 = second(); + + generic_comm_pattern(Ngroups, kernel_local, kernel_imported); + + if(mainstep == 0) + { + /* do final operations on results */ + for(i = 0, npleft = 0; i < Ngroups; i++) + { + if(Todo[i]) + { + overdensity = M200[i] / (4.0 * M_PI / 3.0 * R200[i] * R200[i] * R200[i]) / rhoback; + + if((Right[i] - Left[i]) > 1.0e-4 * Left[i]) + { + /* need to redo this group */ + npleft++; + + if(overdensity > Deltas[rep]) + Left[i] = R200[i]; + else + Right[i] = R200[i]; + + if(iter >= MAXITER - 10) + { + printf("gr=%d task=%d R200=%g Left=%g Right=%g Menclosed=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, + ThisTask, R200[i], Left[i], Right[i], M200[i], Right[i] - Left[i], Group[i].Pos[0], + Group[i].Pos[1], Group[i].Pos[2]); + myflush(stdout); + } + } + else + Todo[i] = 0; + } + } + } + else + for(i = 0, npleft = 0; i < Ngroups; i++) + Todo[i] = 0; + + sumup_large_ints(1, &npleft, &ntot); + + t1 = second(); + + if(ntot > 0) + { + iter++; + + if(iter > 0) + mpi_printf("SUBFIND: SO iteration %2d: need to repeat for %12lld halo centers. (took %g sec)\n", iter, ntot, + timediff(t0, t1)); + + if(iter > MAXITER) + terminate("failed to converge in SO iteration"); + } + } + while(ntot > 0); + } /* end of mainstep loop */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + double *egypot = mymalloc("egypot", Ngroups * sizeof(double)); + + subfind_so_potegy(egypot); + + for(i = 0; i < Ngroups; i++) + { + double rate; + + /* work out sampling rate */ + if(AngMom[i].N200 < SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER) + rate = 1.0; + else + rate = (SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER / AngMom[i].N200); + + AngMom[i].Epot = egypot[i] / (rate * rate); + } + + myfree(egypot); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + for(i = 0; i < Ngroups; i++) + { + if(Group[i].Nsubs > 0) + { + overdensity = M200[i] / (4.0 * M_PI / 3.0 * R200[i] * R200[i] * R200[i]) / rhoback; + + if((overdensity - Deltas[rep]) > 0.1 * Deltas[rep]) + { + R200[i] = M200[i] = 0; +#ifdef SUBFIND_EXTENDED_PROPERTIES + memset(&AngMom[i], 0, sizeof(struct Angular_Momentum)); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + else if(M200[i] < 5 * Group[i].Mass / Group[i].Len) + { + R200[i] = M200[i] = 0; +#ifdef SUBFIND_EXTENDED_PROPERTIES + memset(&AngMom[i], 0, sizeof(struct Angular_Momentum)); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + } + else + { + R200[i] = M200[i] = 0; +#ifdef SUBFIND_EXTENDED_PROPERTIES + memset(&AngMom[i], 0, sizeof(struct Angular_Momentum)); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + + switch(rep) + { + case 0: + Group[i].M_Mean200 = M200[i]; + Group[i].R_Mean200 = R200[i]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Group[i].Ekin_Mean200 = AngMom[i].Ekin; + Group[i].Ethr_Mean200 = AngMom[i].Ethr; + Group[i].Epot_Mean200 = AngMom[i].Epot; + Group[i].CMFrac_Mean200 = AngMom[i].CMFrac; + for(int k = 0; k < NTYPES; k++) + { + Group[i].MassType_Mean200[k] = AngMom[i].MassType[k]; + Group[i].LenType_Mean200[k] = AngMom[i].LenType[k]; + Group[i].CMFracType_Mean200[k] = AngMom[i].CMFracType[k]; + } + for(int k = 0; k < 3; k++) + { + Group[i].J_Mean200[k] = AngMom[i].Jtot[k]; + Group[i].JDM_Mean200[k] = AngMom[i].Jdm[k]; + Group[i].JGas_Mean200[k] = AngMom[i].Jgas[k]; + Group[i].JStars_Mean200[k] = AngMom[i].Jstars[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + break; + case 1: + Group[i].M_TopHat200 = M200[i]; + Group[i].R_TopHat200 = R200[i]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Group[i].Ekin_TopHat200 = AngMom[i].Ekin; + Group[i].Ethr_TopHat200 = AngMom[i].Ethr; + Group[i].Epot_TopHat200 = AngMom[i].Epot; + Group[i].CMFrac_TopHat200 = AngMom[i].CMFrac; + for(int k = 0; k < NTYPES; k++) + { + Group[i].MassType_TopHat200[k] = AngMom[i].MassType[k]; + Group[i].LenType_TopHat200[k] = AngMom[i].LenType[k]; + Group[i].CMFracType_TopHat200[k] = AngMom[i].CMFracType[k]; + } + for(int k = 0; k < 3; k++) + { + Group[i].J_TopHat200[k] = AngMom[i].Jtot[k]; + Group[i].JDM_TopHat200[k] = AngMom[i].Jdm[k]; + Group[i].JGas_TopHat200[k] = AngMom[i].Jgas[k]; + Group[i].JStars_TopHat200[k] = AngMom[i].Jstars[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + break; + case 2: + Group[i].M_Crit200 = M200[i]; + Group[i].R_Crit200 = R200[i]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Group[i].Ekin_Crit200 = AngMom[i].Ekin; + Group[i].Ethr_Crit200 = AngMom[i].Ethr; + Group[i].Epot_Crit200 = AngMom[i].Epot; + Group[i].CMFrac_Crit200 = AngMom[i].CMFrac; + for(int k = 0; k < NTYPES; k++) + { + Group[i].MassType_Crit200[k] = AngMom[i].MassType[k]; + Group[i].LenType_Crit200[k] = AngMom[i].LenType[k]; + Group[i].CMFracType_Crit200[k] = AngMom[i].CMFracType[k]; + } + for(int k = 0; k < 3; k++) + { + Group[i].J_Crit200[k] = AngMom[i].Jtot[k]; + Group[i].JDM_Crit200[k] = AngMom[i].Jdm[k]; + Group[i].JGas_Crit200[k] = AngMom[i].Jgas[k]; + Group[i].JStars_Crit200[k] = AngMom[i].Jstars[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + break; + case 3: + Group[i].M_Crit500 = M200[i]; + Group[i].R_Crit500 = R200[i]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Group[i].Ekin_Crit500 = AngMom[i].Ekin; + Group[i].Ethr_Crit500 = AngMom[i].Ethr; + Group[i].Epot_Crit500 = AngMom[i].Epot; + Group[i].CMFrac_Crit500 = AngMom[i].CMFrac; + for(int k = 0; k < NTYPES; k++) + { + Group[i].MassType_Crit500[k] = AngMom[i].MassType[k]; + Group[i].LenType_Crit500[k] = AngMom[i].LenType[k]; + Group[i].CMFracType_Crit500[k] = AngMom[i].CMFracType[k]; + } + for(int k = 0; k < 3; k++) + { + Group[i].J_Crit500[k] = AngMom[i].Jtot[k]; + Group[i].JDM_Crit500[k] = AngMom[i].Jdm[k]; + Group[i].JGas_Crit500[k] = AngMom[i].Jgas[k]; + Group[i].JStars_Crit500[k] = AngMom[i].Jstars[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + break; + } + } + } + + myfree(Todo); +#ifdef SUBFIND_EXTENDED_PROPERTIES + myfree(Paux); + myfree(AngMom); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + myfree(M200); + myfree(R200); + myfree(Right); + myfree(Left); + + double tend = second(); + return timediff(tstart, tend); +} + +/*! \brief Evaluate function of subfind_overdensity. + * + * \param[in] target Index of group. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +static int subfind_overdensity_evaluate(int target, int mode, int threadid) +{ + int k, p, no, numnodes, *firstnode; + double hsml, mass; + MyDouble *pos; + struct NODE *current; + MyDouble dx, dy, dz, dist, r2; +#define FACT2 0.86602540 + MyDouble xtmp, ytmp, ztmp; + + data_in local, *in; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + in = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + in = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = in->Pos; + hsml = in->R200; + mass = 0; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + double Pmom[3], Mtot = 0, Jtot[3], Jdm[3], Jgas[3], Jstars[3], CMFrac = 0, N200 = 0; + double ekin = 0, etherm = 0; + double MassType[NTYPES], CMFracType[NTYPES]; + int LenType[NTYPES]; + + for(int i = 0; i < 3; i++) + { + Pmom[i] = 0; + Jtot[i] = 0; + Jdm[i] = 0; + Jgas[i] = 0; + Jstars[i] = 0; + } + for(int i = 0; i < NTYPES; i++) + { + MassType[i] = 0; + LenType[i] = 0; + CMFracType[i] = 0; + } + + if(mainstep == 1) + { + Mtot = in->M200; + N200 = in->AngMomIn.N200; + for(int i = 0; i < 3; i++) + Pmom[i] = in->AngMomIn.Pmom[i]; + } + else if(mainstep == 2) + { + Mtot = in->M200; + for(int i = 0; i < 3; i++) + { + Pmom[i] = in->AngMomIn.Pmom[i]; + Jtot[i] = in->AngMomIn.Jtot[i]; + Jdm[i] = in->AngMomIn.Jdm[i]; + Jgas[i] = in->AngMomIn.Jgas[i]; + Jstars[i] = in->AngMomIn.Jstars[i]; + } + for(int i = 0; i < NTYPES; i++) + MassType[i] = in->AngMomIn.MassType[i]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Tree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Tree_MaxPart) /* single particle */ + { + p = no; + no = Nextnode[no]; + + dist = hsml; + dx = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - pos[2]); + if(dz > dist) + continue; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + if(mainstep == 0) + mass += P[p].Mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 0) + { + for(int i = 0; i < 3; i++) + Pmom[i] += P[p].Mass * P[p].Vel[i] / All.cf_atime; // units: 10^10 M_sol/h km/s + + for(int i = 0; i < NTYPES; i++) + if(P[p].Type == i) + { + MassType[i] += P[p].Mass; + + LenType[i]++; + } + + N200 += 1.0; + } + else if(mainstep == 1) + { + double rate; + /* work out sampling rate */ + if(N200 < SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER) + rate = 1.0; + else + rate = (SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER / N200); + + if(get_random_number_aux() < rate) + { + if(NumPaux >= NumPart) + terminate("NumPaux >= NumPart"); + + Paux[NumPaux].Pos[0] = NEAREST_X(P[p].Pos[0] - pos[0]); + Paux[NumPaux].Pos[1] = NEAREST_Y(P[p].Pos[1] - pos[1]); + Paux[NumPaux].Pos[2] = NEAREST_Z(P[p].Pos[2] - pos[2]); + Paux[NumPaux].Mass = P[p].Mass; + Paux[NumPaux].TaskOfGr = in->TaskOfGr; + Paux[NumPaux].LocGrIndex = in->LocGrIndex; + Paux[NumPaux].Type = P[p].Type; + Paux[NumPaux].SofteningType = P[p].SofteningType; + NumPaux++; + } + + int ptype = P[p].Type; + + double Pos_pbc[3], Vel_centre[3], Vel_tot[3]; + Pos_pbc[0] = NEAREST_X(P[p].Pos[0] - pos[0]) * All.cf_atime; + Pos_pbc[1] = NEAREST_Y(P[p].Pos[1] - pos[1]) * All.cf_atime; + Pos_pbc[2] = NEAREST_Z(P[p].Pos[2] - pos[2]) * All.cf_atime; + + for(int i = 0; i < 3; i++) + Vel_centre[i] = (Pmom[i] / Mtot); // units: km/s + + for(int i = 0; i < 3; i++) + Vel_tot[i] = P[p].Vel[i] / All.cf_atime - Vel_centre[i] + All.cf_Hrate * Pos_pbc[i]; + + ekin += 0.5 * P[p].Mass * (Vel_tot[0] * Vel_tot[0] + Vel_tot[1] * Vel_tot[1] + Vel_tot[2] * Vel_tot[2]); + + Jtot[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + Jtot[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + Jtot[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + if(ptype == 1) // dm illustris + { + Jdm[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + Jdm[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + Jdm[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 0) // gas + { + etherm += P[p].Mass * PS[p].Utherm; + + Jgas[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + Jgas[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + Jgas[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 4) // stars + { + Jstars[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + Jstars[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + Jstars[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + } + else if(mainstep == 2) + { + int ptype = P[p].Type; + + double Pos_pbc[3], Vel_centre[3], Vel_tot[3], jpart[3], Jtot[3]; + Pos_pbc[0] = NEAREST_X(P[p].Pos[0] - pos[0]) * All.cf_atime; + Pos_pbc[1] = NEAREST_Y(P[p].Pos[1] - pos[1]) * All.cf_atime; + Pos_pbc[2] = NEAREST_Z(P[p].Pos[2] - pos[2]) * All.cf_atime; + + for(int i = 0; i < 3; i++) + Vel_centre[i] = (Pmom[i] / Mtot); + + for(int i = 0; i < 3; i++) + Vel_tot[i] = P[p].Vel[i] / All.cf_atime - Vel_centre[i] + All.cf_Hrate * Pos_pbc[i]; + + jpart[0] = P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + jpart[1] = P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + jpart[2] = P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + if((Jtot[0] * jpart[0] + Jtot[1] * jpart[1] + Jtot[2] * jpart[2]) < 0.) + CMFrac += P[p].Mass / Mtot; + + if(ptype == 1) // dm + if((Jdm[0] * jpart[0] + Jdm[1] * jpart[1] + Jdm[2] * jpart[2]) < 0.) + CMFracType[1] += P[p].Mass / MassType[1]; + + if(ptype == 0) // gas + if((Jgas[0] * jpart[0] + Jgas[1] * jpart[1] + Jgas[2] * jpart[2]) < 0.) + CMFracType[0] += P[p].Mass / MassType[0]; + + if(ptype == 4) // stars + if((Jstars[0] * jpart[0] + Jstars[1] * jpart[1] + Jstars[2] * jpart[2]) < 0.) + CMFracType[4] += P[p].Mass / MassType[4]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + current = &Nodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + dist = hsml + 0.5 * current->len; + dx = FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > dist) + continue; + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + if((r2 = (dx * dx + dy * dy + dz * dz)) > dist * dist) + continue; + +#ifndef SUBFIND_EXTENDED_PROPERTIES + if(no >= Tree_FirstNonTopLevelNode) /* only do this for fully local nodes */ + { + /* test whether the node is contained within the sphere, which gives short-cut if we only need the mass */ + dist = hsml - FACT2 * current->len; + if(dist > 0) + if(r2 < dist * dist) + { + mass += current->u.d.mass; + continue; + } + } +#endif /* #ifndef SUBFIND_EXTENDED_PROPERTIES */ + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + int n = no - Tree_ImportedNodeOffset; + no = Nextnode[no - Tree_MaxNodes]; + + dist = hsml; + dx = FOF_NEAREST_LONG_X(Tree_Points[n].Pos[0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Points[n].Pos[1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Points[n].Pos[2] - pos[2]); + if(dz > dist) + continue; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + mass += Tree_Points[n].Mass; + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + if(mode == MODE_LOCAL_PARTICLES) + tree_treefind_export_node_threads(no, target, threadid); + + no = Nextnode[no - Tree_MaxNodes]; + } + } + } + + out.Mass = mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 0) + { + for(int k = 0; k < 3; k++) + out.AngMomOut.Pmom[k] = Pmom[k]; + for(int k = 0; k < NTYPES; k++) + { + out.AngMomOut.MassType[k] = MassType[k]; + out.AngMomOut.LenType[k] = LenType[k]; + } + + out.AngMomOut.N200 = N200; + } + else if(mainstep == 1) + { + for(int k = 0; k < 3; k++) + { + out.AngMomOut.Jtot[k] = Jtot[k]; + out.AngMomOut.Jdm[k] = Jdm[k]; + out.AngMomOut.Jgas[k] = Jgas[k]; + out.AngMomOut.Jstars[k] = Jstars[k]; + } + + out.AngMomOut.Ekin = ekin; + out.AngMomOut.Ethr = etherm; + } + else if(mainstep == 2) + { + out.AngMomOut.CMFrac = CMFrac; + for(int k = 0; k < NTYPES; k++) + out.AngMomOut.CMFracType[k] = CMFracType[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_so_potegy.c b/src/amuse/community/arepo/src/subfind/subfind_so_potegy.c new file mode 100644 index 0000000000..823cb62eb2 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_so_potegy.c @@ -0,0 +1,853 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_so_potegy.c + * \date 05/2018 + * \brief Calculates the the potential energy. + * \details contains functions: + * static void subfind_so_potegy_loctree_findExtent(int npart, + * int start) + * static int subfind_so_potegy_loctree_treebuild(int npart, + * int start) + * static void subfind_so_potegy_loctree_update_node_recursive( + * int no, int sib, int father) + * double subfind_so_potegy_loctree_treeevaluate_potential(int + * target) + * static size_t subfind_so_potegy_loctree_treeallocate(int + * maxnodes, int maxpart) + * static void subfind_so_potegy_loctree_treefree(void) + * static int subfind_compare_Paux_LocGrIndex(const void *a, + * const void *b) + * double subfind_so_potegy(double *egypot) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) + +#include "../fof/fof.h" +#include "subfind.h" + +static double RootLen, RootFac, RootBigFac, RootInverseLen, RootCenter[3], RootCorner[3]; +static int LocMaxPart; +static int MaxNodes, last; +static int *LocNextNode; +static unsigned long long *LocTree_IntPos_list; +static struct paux_data *LocPaux; + +static void subfind_so_potegy_loctree_update_node_recursive(int no, int sib, int father); + +/*! \brief Node structure for local tree. + */ +static struct LocNODE +{ + union + { + int suns[8]; /*!< temporary pointers to daughter nodes */ + struct + { + MyDouble s[3]; /*!< center of mass of node */ + MyDouble mass; /*!< mass of node */ + unsigned char maxsofttype; +#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */ + int sibling; /*!< this gives the next node in the walk in case the current node can be used */ + int nextnode; /*!< this gives the next node in case the current node needs to be opened */ + } d; + } u; + + MyDouble center[3]; /*!< geometrical center of node */ + MyFloat len; /*!< sidelength of treenode */ + +#ifdef MULTIPLE_NODE_SOFTENING + MyDouble mass_per_type[NSOFTTYPES]; +#endif +} * LocNodes_base, /*!< points to the actual memory allocted for the nodes */ + *LocNodes; /*!< this is a pointer used to access the nodes which is shifted such that Nodes[LocMaxPart] + gives the first allocated node */ + +/*! \brief Finds spatial extent of local particles. + * + * Sets global 'Root*' variables that determine root node properties. + * + * \param[in] npart Number of particles. + * \param[in] start Start index. + * + * \return void + */ +static void subfind_so_potegy_loctree_findExtent(int npart, int start) +{ + double len, xmin[3], xmax[3]; + + /* determine extension */ + for(int i = 0; i < 3; i++) + { + xmin[i] = MAX_REAL_NUMBER; + xmax[i] = -MAX_REAL_NUMBER; + } + + for(int k = 0; k < npart; k++) + { + int i = start + k; + + for(int j = 0; j < 3; j++) + { + if(xmin[j] > LocPaux[i].Pos[j]) + xmin[j] = LocPaux[i].Pos[j]; + + if(xmax[j] < LocPaux[i].Pos[j]) + xmax[j] = LocPaux[i].Pos[j]; + } + } + + len = 0; + for(int j = 0; j < 3; j++) + if(xmax[j] - xmin[j] > len) + len = xmax[j] - xmin[j]; + + len *= 1.001; + + RootLen = len; + RootInverseLen = 1.0 / RootLen; + RootFac = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION)); + RootBigFac = (RootLen / (((long long)1) << 52)); + + for(int j = 0; j < 3; j++) + { + RootCenter[j] = 0.5 * (xmin[j] + xmax[j]); + RootCorner[j] = 0.5 * (xmin[j] + xmax[j]) - 0.5 * len; + } +} + +/*! \brief Builds local tree. + * + * \param[in] npart Number of particles. + * \param[in] start Start index. + * + * \return Number of nodes in tree. + */ +static int subfind_so_potegy_loctree_treebuild(int npart, int start) +{ + int subnode = 0, parent = -1, numnodes; + int nfree, th, nn; + struct LocNODE *nfreep; + + /* select first node */ + nfree = LocMaxPart; + nfreep = &LocNodes[nfree]; + + /* create an empty root node */ + nfreep->len = (MyFloat)RootLen; + for(int i = 0; i < 3; i++) + nfreep->center[i] = (MyFloat)RootCenter[i]; + + for(int i = 0; i < 8; i++) + nfreep->u.suns[i] = -1; + + numnodes = 1; + nfreep++; + nfree++; + + /* insert all particles */ + + LocTree_IntPos_list = + (unsigned long long *)mymalloc_movable(&LocTree_IntPos_list, "LocTree_IntPos_list", 3 * LocMaxPart * sizeof(unsigned long long)); + + for(int k = 0; k < npart; k++) + { + int i = start + k; + + MyDouble *posp; + + posp = &LocPaux[i].Pos[0]; + + unsigned long long xxb = force_double_to_int(((posp[0] - RootCorner[0]) * RootInverseLen) + 1.0); + unsigned long long yyb = force_double_to_int(((posp[1] - RootCorner[1]) * RootInverseLen) + 1.0); + unsigned long long zzb = force_double_to_int(((posp[2] - RootCorner[2]) * RootInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + signed long long centermask = (0xFFF0000000000000llu); + unsigned char levels = 0; + + unsigned long long *intposp = &LocTree_IntPos_list[3 * i]; + + *intposp++ = xxb; + *intposp++ = yyb; + *intposp++ = zzb; + + th = LocMaxPart; + + while(1) + { + if(th >= LocMaxPart) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEVEL < gravitational softening length + */ + for(int j = 0; j < 8; j++) + { + if(LocNodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + nn = LocNodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; /* note: subnode can still be used in the next step of the walk */ + th = nn; + } + else + { + /* here we have found an empty slot where we can + * attach the new particle as a leaf + */ + LocNodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* we try to insert into a leaf with a single particle + * need to generate a new internal node at this point + */ + LocNodes[parent].u.suns[subnode] = nfree; + + /* the other is: */ + double len = ((double)(mask << 1)) * RootBigFac; + double cx = ((double)((xxb & centermask) | mask)) * RootBigFac + RootCorner[0]; + double cy = ((double)((yyb & centermask) | mask)) * RootBigFac + RootCorner[1]; + double cz = ((double)((zzb & centermask) | mask)) * RootBigFac + RootCorner[2]; + + nfreep->len = len; + nfreep->center[0] = cx; + nfreep->center[1] = cy; + nfreep->center[2] = cz; + + nfreep->u.suns[0] = -1; + nfreep->u.suns[1] = -1; + nfreep->u.suns[2] = -1; + nfreep->u.suns[3] = -1; + nfreep->u.suns[4] = -1; + nfreep->u.suns[5] = -1; + nfreep->u.suns[6] = -1; + nfreep->u.suns[7] = -1; + + unsigned long long *intppos = &LocTree_IntPos_list[3 * th]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = th; + + th = nfree; /* resume trying to insert the new particle at + the newly created internal node */ + + numnodes++; + nfree++; + nfreep++; + + if(numnodes >= MaxNodes) + { + MaxNodes *= 1.2; + + LocNodes_base = (struct LocNODE *)myrealloc_movable(LocNodes_base, (MaxNodes + 1) * sizeof(struct LocNODE)); + LocNodes = LocNodes_base - LocMaxPart; + nfreep = &LocNodes[nfree]; + + if(numnodes > MaxNodes) + { + char buf[1000]; + + sprintf(buf, "maximum number %d of tree-nodes reached., for particle %d %g %g %g", MaxNodes, i, + LocPaux[i].Pos[0], LocPaux[i].Pos[1], LocPaux[i].Pos[2]); + terminate(buf); + } + } + } + } + } + + myfree(LocTree_IntPos_list); + + /* now compute the multipole moments recursively */ + last = -1; + subfind_so_potegy_loctree_update_node_recursive(LocMaxPart, -1, -1); + + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = -1; + else + LocNextNode[last] = -1; + + return numnodes; +} + +/*! \brief Walk the tree and update node data recursively. + * + * This routine computes the multipole moments for a given internal node and + * all its subnodes using a recursive computation. Note that this switches + * the information stored in LocNodes[no].u from suns to d! + * + * + * \param[in] no Node index. + * \param[in] sib Sibling index. + * \param[in] father Parent index. + * + * \return void + */ +static void subfind_so_potegy_loctree_update_node_recursive(int no, int sib, int father) +{ + int j, jj, p, pp = 0, nextsib, suns[8]; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + double mass; + double s[3]; + + if(no >= LocMaxPart) + { + for(j = 0; j < 8; j++) + suns[j] = LocNodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + if(last >= 0) + { + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = no; + else + LocNextNode[last] = no; + } + + last = no; + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; + +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + subfind_so_potegy_loctree_update_node_recursive(p, nextsib, no); + + if(p >= LocMaxPart) /* an internal node */ + { + mass += LocNodes[p].u.d.mass; /* we assume a fixed particle mass */ + s[0] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[0]; + s[1] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[1]; + s[2] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[LocNodes[p].u.d.maxsofttype]) + maxsofttype = LocNodes[p].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += LocNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < LocNodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = LocNodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > LocNodes[p].u.d.minhydrosofttype) + minhydrosofttype = LocNodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else /* a particle */ + { + mass += LocPaux[p].Mass; + + s[0] += LocPaux[p].Mass * LocPaux[p].Pos[0]; + s[1] += LocPaux[p].Mass * LocPaux[p].Pos[1]; + s[2] += LocPaux[p].Mass * LocPaux[p].Pos[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[LocPaux[p].SofteningType]) + maxsofttype = LocPaux[p].SofteningType; +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[LocPaux[p].Type == 0 ? 0 : LocPaux[p].SofteningType] += LocPaux[p].Mass; + + if(LocPaux[p].Type == 0) + { + if(maxhydrosofttype < LocPaux[p].SofteningType) + maxhydrosofttype = LocPaux[p].SofteningType; + if(minhydrosofttype > LocPaux[p].SofteningType) + minhydrosofttype = LocPaux[p].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[LocPaux[p].SofteningType] += LocPaux[p].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + } + + if(mass > 0) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = LocNodes[no].center[0]; + s[1] = LocNodes[no].center[1]; + s[2] = LocNodes[no].center[2]; + } + + LocNodes[no].u.d.s[0] = (MyFloat)s[0]; + LocNodes[no].u.d.s[1] = (MyFloat)s[1]; + LocNodes[no].u.d.s[2] = (MyFloat)s[2]; + LocNodes[no].u.d.mass = (MyFloat)mass; + LocNodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + LocNodes[no].mass_per_type[k] = mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + LocNodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + LocNodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + LocNodes[no].u.d.sibling = sib; + } + else /* single particle or pseudo particle */ + { + if(last >= 0) + { + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = no; + else + LocNextNode[last] = no; + } + + last = no; + } +} + +/*! \brief Calculates the gravitational potential energy of single particle. + * + * \pararm[in] target Target particle index (in LocPaux). + * + * \return Gravitational potential. + */ +double subfind_so_potegy_loctree_treeevaluate_potential(int target) +{ + struct LocNODE *nop = 0; + int no; + double r2, dx, dy, dz, mass, r, u, h_i, h_j, hmax, h_inv, wp; + double pot, pos_x, pos_y, pos_z, xtmp, ytmp, ztmp; + + pos_x = LocPaux[target].Pos[0]; + pos_y = LocPaux[target].Pos[1]; + pos_z = LocPaux[target].Pos[2]; + + h_i = All.ForceSoftening[LocPaux[target].SofteningType]; + + pot = 0; + + no = LocMaxPart; + + while(no >= 0) + { +#ifdef MULTIPLE_NODE_SOFTENING + int indi_flag1 = -1, indi_flag2 = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(no < LocMaxPart) /* single particle */ + { + dx = GRAVITY_NEAREST_X(LocPaux[no].Pos[0] - pos_x); + dy = GRAVITY_NEAREST_Y(LocPaux[no].Pos[1] - pos_y); + dz = GRAVITY_NEAREST_Z(LocPaux[no].Pos[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + mass = LocPaux[no].Mass; + + h_j = All.ForceSoftening[LocPaux[no].SofteningType]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + + no = LocNextNode[no]; + } + else + { + nop = &LocNodes[no]; + mass = nop->u.d.mass; + + dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x); + dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y); + dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + /* check Barnes-Hut opening criterion */ + if(nop->len * nop->len > r2 * All.ErrTolThetaSubfind * All.ErrTolThetaSubfind) + { + /* open cell */ + if(mass) + { + no = nop->u.d.nextnode; + continue; + } + } + + h_j = All.ForceSoftening[nop->u.d.maxsofttype]; + + if(h_j > h_i) + { +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype) + if(LocNodes[no].mass_per_type[0] > 0) + if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype]) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + indi_flag1 = 0; + indi_flag2 = NSOFTTYPES; +#else /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(r2 < h_j * h_j) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */ + hmax = h_j; + } + else + hmax = h_i; + + no = nop->u.d.sibling; /* node can be used */ + } + + r = sqrt(r2); +#ifdef MULTIPLE_NODE_SOFTENING + int type; + for(type = indi_flag1; type < indi_flag2; type++) + { + if(type >= 0) + { + mass = nop->mass_per_type[type]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(type == 0) + h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype]; + else +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + h_j = All.ForceSoftening[type]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + } + + if(mass) + { +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(r >= hmax) + pot -= mass / r; + else + { + h_inv = 1.0 / hmax; + + u = r * h_inv; + + if(u < 0.5) + wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6)); + else + wp = -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u))); + + pot += mass * h_inv * wp; +#ifdef MULTIPLE_NODE_SOFTENING + } + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + return pot; +} + +/*! \brief Allocates memory used for storage of the tree and auxiliary arrays + * for tree-walk and link-lists. + * + * \param[in] maxnodes Maximum number of nodes. + * \param[in] maxpart Maximum number of particles. + * + * \return Number of allocated bytes. + */ +static size_t subfind_so_potegy_loctree_treeallocate(int maxnodes, int maxpart) +{ + size_t bytes, allbytes = 0; + + if(LocNextNode) + terminate("loctree already allocated"); + + MaxNodes = maxnodes; + LocMaxPart = maxpart; + + LocNextNode = (int *)mymalloc("LocNextNode", bytes = maxpart * sizeof(int)); + allbytes += bytes; + + R2list = (r2type *)mymalloc("R2list", bytes = maxpart * sizeof(r2type)); + allbytes += bytes; + + LocNodes_base = (struct LocNODE *)mymalloc_movable(&LocNodes_base, "LocNodes_base", bytes = (MaxNodes + 1) * sizeof(struct LocNODE)); + LocNodes = LocNodes_base - LocMaxPart; + allbytes += bytes; + + return allbytes; +} + +/*! \brief Frees the allocated memory. + * + * \return void + */ +static void subfind_so_potegy_loctree_treefree(void) +{ + myfree(LocNodes_base); + myfree(R2list); + myfree(LocNextNode); + + LocNextNode = NULL; + R2list = NULL; + LocNodes_base = NULL; +} + +/*! \brief Comparison function for paux_data objects. + * + * Compares field LocGrIndex. + * + * \param[in] a First object to be compared. + * \param[in] b Second object to be compared. + * + * \return (-1,0,1); -1 if a < b. + */ +static int subfind_compare_Paux_LocGrIndex(const void *a, const void *b) +{ + if(((struct paux_data *)a)->LocGrIndex < ((struct paux_data *)b)->LocGrIndex) + return -1; + + if(((struct paux_data *)a)->LocGrIndex > ((struct paux_data *)b)->LocGrIndex) + return +1; + + return 0; +} + +/*! \brief Calculates potential energy of spherical overdensity groups. + * + * \param[out] egypot Array with potential energies in each group. + * + * \return Time this routine took. + */ +double subfind_so_potegy(double *egypot) +{ + double t0 = second(); + mpi_printf("SUBFIND: Starting SO potential energy computation\n"); + + size_t *count_send = (size_t *)mymalloc_movable(&count_send, "count_send", NTask * sizeof(size_t)); + size_t *offset_send = (size_t *)mymalloc_movable(&offset_send, "offset_send", NTask * sizeof(size_t)); + size_t *count_recv = (size_t *)mymalloc_movable(&count_recv, "count_recv", NTask * sizeof(size_t)); + size_t *offset_recv = (size_t *)mymalloc_movable(&offset_recv, "offset_recv", NTask * sizeof(size_t)); + + for(int i = 0; i < NTask; i++) + count_send[i] = 0; + + for(int i = 0; i < NumPaux; i++) + count_send[Paux[i].TaskOfGr]++; + + MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + offset_send[0] = offset_recv[0] = 0; + + for(int i = 1; i < NTask; i++) + { + offset_send[i] = offset_send[i - 1] + count_send[i - 1]; + offset_recv[i] = offset_recv[i - 1] + count_recv[i - 1]; + } + + struct paux_data *PauxTmp = (struct paux_data *)mymalloc_movable(&PauxTmp, "PauxTmp", NumPaux * sizeof(struct paux_data)); + + for(int i = 0; i < NTask; i++) + count_send[i] = 0; + + for(int i = 0; i < NumPaux; i++) + { + int task = Paux[i].TaskOfGr; + int loc = offset_send[task] + count_send[task]++; + PauxTmp[loc] = Paux[i]; + } + + int NumPauxRecv = 0; + + for(int i = 0; i < NTask; i++) + NumPauxRecv += count_recv[i]; + + LocPaux = (struct paux_data *)mymalloc_movable(&LocPaux, "LocPaux", NumPauxRecv * sizeof(struct paux_data)); + + myMPI_Alltoallv(PauxTmp, count_send, offset_send, LocPaux, count_recv, offset_recv, sizeof(struct paux_data), 1, MPI_COMM_WORLD); + + myfree_movable(PauxTmp); + + qsort(LocPaux, NumPauxRecv, sizeof(struct paux_data), subfind_compare_Paux_LocGrIndex); + + int *group_len = (int *)mymalloc("group_len", Ngroups * sizeof(int)); + int *group_off = (int *)mymalloc("group_off", Ngroups * sizeof(int)); + + for(int i = 0; i < Ngroups; i++) + group_len[i] = 0; + + for(int i = 0; i < NumPauxRecv; i++) + { + int j = LocPaux[i].LocGrIndex; + if(j < 0 || j >= Ngroups) + terminate("j=%d Ngroups=%d", j, Ngroups); + + group_len[j]++; + } + + group_off[0] = 0; + + for(int i = 1; i < Ngroups; i++) + group_off[i] = group_off[i - 1] + group_len[i - 1]; + + int MaxAllocPart = NumPart; + // extend in case a single group holds more particles than NumPart + for(int i = 0; i < Ngroups; i++) + if(group_len[i] > MaxAllocPart) + MaxAllocPart = group_len[i]; + + subfind_so_potegy_loctree_treeallocate((int)(All.TreeAllocFactor * MaxAllocPart) + NTopnodes, MaxAllocPart); + + /* now do the actual potential calculation */ + for(int i = 0; i < Ngroups; i++) + { + subfind_so_potegy_loctree_findExtent(group_len[i], group_off[i]); + subfind_so_potegy_loctree_treebuild(group_len[i], group_off[i]); + + egypot[i] = 0; + + for(int j = 0; j < group_len[i]; j++) + { + int target = group_off[i] + j; + + double pot = subfind_so_potegy_loctree_treeevaluate_potential(target); + + /* remove self-potential */ + pot += LocPaux[target].Mass / (All.ForceSoftening[LocPaux[target].SofteningType] / 2.8); + + pot *= All.G / All.cf_atime; + + egypot[i] += 0.5 * pot * LocPaux[target].Mass; + } + } + + subfind_so_potegy_loctree_treefree(); + + myfree(group_off); + myfree(group_len); + + myfree(LocPaux); + + myfree(offset_recv); + myfree(count_recv); + myfree(offset_send); + myfree(count_send); + + double t1 = second(); + mpi_printf("SUBFIND: SO potential energy computation took %g sec\n", timediff(t0, t1)); + + return timediff(t0, t1); +} + +#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c b/src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c new file mode 100644 index 0000000000..5787cb3441 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c @@ -0,0 +1,442 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_sort_kernels.c + * \date 05/2018 + * \brief Comparison functions that serve as sorting kernels for various + * different structs used in subfind. + * \details contains functions: + * int subfind_compare_procassign_GrNr(const void *a, + * const void *b) + * int subfind_compare_submp_GrNr_DM_Density(const void *a, + * const void *b) + * int subfind_compare_submp_OldIndex(const void *a, + * const void *b) + * int subfind_compare_ID_list(const void *a, const void *b) + * int subfind_compare_SubGroup_GrNr_SubNr(const void *a, const + * void *b) + * int subfind_compare_dist_rotcurve(const void *a, const void + * *b) + * int subfind_compare_rlist_mhd(const void *a, const void *b) + * int subfind_compare_binding_energy(const void *a, const void + * *b) + * int subfind_compare_serial_candidates_boundlength(const void + * *a, const void *b) + * int subfind_compare_serial_candidates_rank(const void *a, + * const void *b) + * int subfind_compare_serial_candidates_subnr(const void *a, + * const void *b) + * int subfind_compare_coll_candidates_subnr(const void *a, + * const void *b) + * int subfind_compare_coll_candidates_nsubs(const void *a, + * const void *b) + * int subfind_compare_coll_candidates_boundlength(const void + * *a, const void *b) + * int subfind_compare_coll_candidates_rank(const void *a, + * const void *b) + * int subfind_fof_compare_ID(const void *a, const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../fof/fof.h" +#include "subfind.h" + +#ifdef SUBFIND + +/*! \brief Comparison function for proc_assign_data objects. + * + * Sorting kernel comparing element GrNr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_procassign_GrNr(const void *a, const void *b) +{ + if(((struct proc_assign_data *)a)->GrNr < ((struct proc_assign_data *)b)->GrNr) + return -1; + + if(((struct proc_assign_data *)a)->GrNr > ((struct proc_assign_data *)b)->GrNr) + return +1; + + return 0; +} + +/*! \brief Comparison function for submp_data objects. + * + * Sorting kernel comparing element (most important first): + * GrNr, DM_Density. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for DM density, where -1 if a > b + */ +int subfind_compare_submp_GrNr_DM_Density(const void *a, const void *b) +{ + if(((struct submp_data *)a)->GrNr < ((struct submp_data *)b)->GrNr) + return -1; + + if(((struct submp_data *)a)->GrNr > ((struct submp_data *)b)->GrNr) + return +1; + + if(((struct submp_data *)a)->DM_Density > ((struct submp_data *)b)->DM_Density) + return -1; + + if(((struct submp_data *)a)->DM_Density < ((struct submp_data *)b)->DM_Density) + return +1; + + return 0; +} + +/*! \brief Comparison function for submp_data objects. + * + * Sorting kernel comparing element OldIndex. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_submp_OldIndex(const void *a, const void *b) +{ + if(((struct submp_data *)a)->OldIndex < ((struct submp_data *)b)->OldIndex) + return -1; + + if(((struct submp_data *)a)->OldIndex > ((struct submp_data *)b)->OldIndex) + return +1; + + return 0; +} + +/*! \brief Comparison function for id_list objects. + * + * Sorting kernel comparing elements (most important first): + * GrNr, SubNr, Type, BindingEgy. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_ID_list(const void *a, const void *b) +{ + if(((struct id_list *)a)->GrNr < ((struct id_list *)b)->GrNr) + return -1; + + if(((struct id_list *)a)->GrNr > ((struct id_list *)b)->GrNr) + return +1; + + if(((struct id_list *)a)->SubNr < ((struct id_list *)b)->SubNr) + return -1; + + if(((struct id_list *)a)->SubNr > ((struct id_list *)b)->SubNr) + return +1; + + if(((struct id_list *)a)->Type < ((struct id_list *)b)->Type) + return -1; + + if(((struct id_list *)a)->Type > ((struct id_list *)b)->Type) + return +1; + + if(((struct id_list *)a)->BindingEgy < ((struct id_list *)b)->BindingEgy) + return -1; + + if(((struct id_list *)a)->BindingEgy > ((struct id_list *)b)->BindingEgy) + return +1; + + return 0; +} + +/*! \brief Comparison function for subgroup_properties objects. + * + * Sorting kernel comparing elements (most important first): + * GrNr and SubNr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_SubGroup_GrNr_SubNr(const void *a, const void *b) +{ + if(((struct subgroup_properties *)a)->GrNr < ((struct subgroup_properties *)b)->GrNr) + return -1; + + if(((struct subgroup_properties *)a)->GrNr > ((struct subgroup_properties *)b)->GrNr) + return +1; + + if(((struct subgroup_properties *)a)->SubNr < ((struct subgroup_properties *)b)->SubNr) + return -1; + + if(((struct subgroup_properties *)a)->SubNr > ((struct subgroup_properties *)b)->SubNr) + return +1; + + return 0; +} + +/*! \brief Comparison function for sort_r2list objects. + * + * Sorting kernel comparing element r. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_dist_rotcurve(const void *a, const void *b) +{ + if(((sort_r2list *)a)->r < ((sort_r2list *)b)->r) + return -1; + + if(((sort_r2list *)a)->r > ((sort_r2list *)b)->r) + return +1; + + return 0; +} + +/*! \brief Comparison function for variables of type double. + * + * Sorting kernel. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_binding_energy(const void *a, const void *b) +{ + if(*((double *)a) > *((double *)b)) + return -1; + + if(*((double *)a) < *((double *)b)) + return +1; + + return 0; +} + +/*! \brief Comparison function for cand_dat objects. + * + * Sorting kernel comparing elements (most important first): + * bound_length and rank. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, excpet bound length, where -1 if a > b. + */ +int subfind_compare_serial_candidates_boundlength(const void *a, const void *b) +{ + if(((struct cand_dat *)a)->bound_length > ((struct cand_dat *)b)->bound_length) + return -1; + + if(((struct cand_dat *)a)->bound_length < ((struct cand_dat *)b)->bound_length) + return +1; + + if(((struct cand_dat *)a)->rank < ((struct cand_dat *)b)->rank) + return -1; + + if(((struct cand_dat *)a)->rank > ((struct cand_dat *)b)->rank) + return +1; + + return 0; +} + +/*! \brief Comparison function for cand_dat objects. + * + * Sorting kernel comparing elements (most important first): + * rank and len. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for len where -1 if a>b. + */ +int subfind_compare_serial_candidates_rank(const void *a, const void *b) +{ + if(((struct cand_dat *)a)->rank < ((struct cand_dat *)b)->rank) + return -1; + + if(((struct cand_dat *)a)->rank > ((struct cand_dat *)b)->rank) + return +1; + + if(((struct cand_dat *)a)->len > ((struct cand_dat *)b)->len) + return -1; + + if(((struct cand_dat *)a)->len < ((struct cand_dat *)b)->len) + return +1; + + return 0; +} + +/*! \brief Comparison function for cand_dat objects. + * + * Sorting kernel comparing element subnr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_serial_candidates_subnr(const void *a, const void *b) +{ + if(((struct cand_dat *)a)->subnr < ((struct cand_dat *)b)->subnr) + return -1; + + if(((struct cand_dat *)a)->subnr > ((struct cand_dat *)b)->subnr) + return +1; + + return 0; +} + +/*! \brief Comparison function for coll_cand_dat objects. + * + * Sorting kernel comparing element subnr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_coll_candidates_subnr(const void *a, const void *b) +{ + if(((struct coll_cand_dat *)a)->subnr < ((struct coll_cand_dat *)b)->subnr) + return -1; + + if(((struct coll_cand_dat *)a)->subnr > ((struct coll_cand_dat *)b)->subnr) + return +1; + + return 0; +} + +/*! \brief Comparison function for coll_cand_dat objects. + * + * Sorting kernel comparing element nsub. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_coll_candidates_nsubs(const void *a, const void *b) +{ + if(((struct coll_cand_dat *)a)->nsub < ((struct coll_cand_dat *)b)->nsub) + return -1; + + if(((struct coll_cand_dat *)a)->nsub > ((struct coll_cand_dat *)b)->nsub) + return +1; + + return 0; +} + +/*! \brief Comparison function for coll_cand_dat objects. + * + * Sorting kernel comparing elements (most important first): + * bound_length, rank. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for bound length where -1 if a > b. + */ +int subfind_compare_coll_candidates_boundlength(const void *a, const void *b) +{ + if(((struct coll_cand_dat *)a)->bound_length > ((struct coll_cand_dat *)b)->bound_length) + return -1; + + if(((struct coll_cand_dat *)a)->bound_length < ((struct coll_cand_dat *)b)->bound_length) + return +1; + + if(((struct coll_cand_dat *)a)->rank < ((struct coll_cand_dat *)b)->rank) + return -1; + + if(((struct coll_cand_dat *)a)->rank > ((struct coll_cand_dat *)b)->rank) + return +1; + + return 0; +} + +/*! \brief Comparison function for coll_cand_dat objects. + * + * Sorting kernel comparing elements (most important first): + * rank and len. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for len, where -1 if a > b + */ +int subfind_compare_coll_candidates_rank(const void *a, const void *b) +{ + if(((struct coll_cand_dat *)a)->rank < ((struct coll_cand_dat *)b)->rank) + return -1; + + if(((struct coll_cand_dat *)a)->rank > ((struct coll_cand_dat *)b)->rank) + return +1; + + if(((struct coll_cand_dat *)a)->len > ((struct coll_cand_dat *)b)->len) + return -1; + + if(((struct coll_cand_dat *)a)->len < ((struct coll_cand_dat *)b)->len) + return +1; + + return 0; +} + +/*! \brief Comparison function for variables of MyIDType. + * + * Sorting kernel. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_fof_compare_ID(const void *a, const void *b) +{ + if(*((MyIDType *)a) < *((MyIDType *)b)) + return -1; + + if(*((MyIDType *)a) > *((MyIDType *)b)) + return +1; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_vars.c b/src/amuse/community/arepo/src/subfind/subfind_vars.c new file mode 100644 index 0000000000..37d25a8cd5 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_vars.c @@ -0,0 +1,102 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_vars.c + * \date 05/2018 + * \brief Variables for the subfind algorithm. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" + +#ifdef SUBFIND + +#include "../domain/domain.h" +#include "../fof/fof.h" +#include "subfind.h" + +double SubDomainCorner[3], SubDomainCenter[3], SubDomainLen, SubDomainFac; +double SubDomainInverseLen, SubDomainBigFac; + +MyDouble GrCM[3]; + +int GrNr; +int NumPartGroup; + +MPI_Comm SubComm; +int CommSplitColor; +int SubNTask, SubThisTask; +int SubTagOffset; + +struct topnode_data *SubTopNodes; +struct local_topnode_data *Sub_LocTopNodes; + +double SubTreeAllocFactor; + +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) +int *NodeGrNr; +#endif + +int *SubDomainTask; +int *SubDomainNodeIndex; +int *SubNextnode; +int SubNTopleaves; +int SubNTopnodes; + +int SubTree_MaxPart; +int SubTree_NumNodes; +int SubTree_MaxNodes; +int SubTree_FirstNonTopLevelNode; +int SubTree_NumPartImported; +int SubTree_NumPartExported; +int SubTree_ImportedNodeOffset; +int SubTree_NextFreeNode; +struct NODE *SubNodes; +struct ExtNODE *SubExtNodes; +int *SubTree_ResultIndexList; +int *SubTree_Task_list; +unsigned long long *SubTree_IntPos_list; +MyDouble *SubTree_Pos_list; + +int Ncollective; +int NprocsCollective; +int MaxNsubgroups = 0; +int MaxNgbs; +int MaxSerialGroupLen; + +r2type *R2list; + +int NumPaux; + +struct paux_data *Paux; +struct proc_assign_data *ProcAssign; +struct subgroup_properties *SubGroup; +struct nearest_r2_data *R2Loc; +struct nearest_ngb_data *NgbLoc; +struct submp_data *submp; +struct cand_dat *candidates; +struct coll_cand_dat *coll_candidates; + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/time_integration/darkenergy.c b/src/amuse/community/arepo/src/time_integration/darkenergy.c new file mode 100644 index 0000000000..c04f181e9f --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/darkenergy.c @@ -0,0 +1,74 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/darkenergy.c + * \date 05/2018 + * \brief Contains the hubble function for a LCDM cosmology. + * \details Using Dark Energy instead of a cosmological constant can be + * archived by replacing Lambda by Lambda * a^(-3*(1+w)) in the + * Hubble function. w = -1 gives back a standard cosmological + * constant! Also w = -1/3 gives Lambda / a^2 which then cancel + * within the Hubble function and is then equal to the dynamics + * of a universe with Lambda = 0 ! + * + * For a time varying w once has to replace Lambda * a^(-3*(1+w)) + * by Lambda * exp(Integral(a,1,3*(1+w)/a)) + * + * Dark Energy does not alter the powerspectrum of initial + * conditions. To get the same cluster for various values or + * functions of w, once has do assign a new redshift to the + * initial conditions to match the linear growth factors, so + * g(z=0)/g(z_ini) == g_w(z=0)/g_w(z_ini^new). Also the initial + * velocities field has to be scaled by + *(Hubble_w(z_ini^new)*Omega_w(z_ini^new)^0.6)/(Hubble(z_ini)*Omega(z_ini)^0.6) + * where _w means the according functions including the terms for + * Dark Energy. + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Hubble function. + * + * Returns the Hubble function at a given scalefactor for a LCDM cosmology. + * + * \param[in] a Scalefactor. + * + * \return Hubble parameter in internal units. + */ +double INLINE_FUNC hubble_function(double a) +{ + double hubble_a; + + hubble_a = All.Omega0 / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda) / (a * a) + All.OmegaLambda; + hubble_a = All.Hubble * sqrt(hubble_a); + + return (hubble_a); +} diff --git a/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c new file mode 100644 index 0000000000..88b7f89a34 --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c @@ -0,0 +1,484 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/do_gravity_hydro.c + * \date 05/2018 + * \brief Contains the two half step kick operators. + * \details This file contains the functions applying the gravitational + * acceleration to the particles (both gas and gravity only). + * The functions + * find_gravity_timesteps_and_do_gravity_step_first_half and + * do_gravity_step_second_half are directly called in the main + * time-evolution loop in run.c. + * contains functions: + * static inline void kick_particle(int i, double dt_gravkick, + * MySingle * Grav) + * void find_gravity_timesteps_and_do_gravity_step_first_half( + * void) + * void do_gravity_step_second_half(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +/*! \brief Applies gravity kick to particles. + * + * Apply change of velocity due to gravitational acceleration. + * For hydrodynamic cells, both velocity and momentum are updated. + * + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] dt_gravkick Timestep of gravity kick operation. + * \param[in] Grav Gravitational acceleration of particle. + * + * \return void + */ +static inline void kick_particle(int i, double dt_gravkick, MySingle* Grav) +{ + int j; + double dvel[3]; + if(P[i].Type == 0) + { + SphP[i].Energy -= 0.5 * P[i].Mass * (P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]); + for(j = 0; j < 3; j++) /* do the kick for gas cells */ + { + dvel[j] = Grav[j] * dt_gravkick; + P[i].Vel[j] += dvel[j]; + SphP[i].Momentum[j] += P[i].Mass * dvel[j]; + } + SphP[i].Energy += 0.5 * P[i].Mass * (P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]); + } + else + { + for(j = 0; j < 3; j++) /* do the kick, only collisionless particles */ + P[i].Vel[j] += Grav[j] * dt_gravkick; + } +} + +/*! \brief Performs the first half step kick operator. + * + * This function applies a half step kick similar to + * do_gravity_step_second_half(). If we are on a PM step the kick due to + * the particle mesh's long range gravity is applied first. Afterwards the + * short range kick due to the tree force is added. + * In both cases the momentum and energy for gas cells is updated. + * + * \return void + */ +void find_gravity_timesteps_and_do_gravity_step_first_half(void) +{ +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + + TIMER_START(CPU_DRIFTS); + + int idx, i; + integertime ti_step, tstart, tend; + double dt_gravkick; + +#ifdef PMGRID + if(All.PM_Ti_endstep == All.Ti_Current) /* need to do long-range kick */ + { + ti_step = get_timestep_pm(); + + All.PM_Ti_begstep = All.PM_Ti_endstep; + All.PM_Ti_endstep = All.PM_Ti_begstep + ti_step; + + tstart = All.PM_Ti_begstep; + tend = tstart + ti_step / 2; + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + for(i = 0; i < NumPart; i++) + kick_particle(i, dt_gravkick, P[i].GravPM); + } +#endif /* #ifdef PMGRID */ + +#ifdef HIERARCHICAL_GRAVITY + /* First, move all active particles to the highest allowed timestep for this synchronization time. + * They will then cascade down to smaller timesteps as needed. + */ + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + int bin = All.HighestSynchronizedTimeBin; + int binold = P[i].TimeBinGrav; + + timebin_move_particle(&TimeBinsGravity, i, binold, bin); + P[i].TimeBinGrav = bin; + } + + long long Previous_GlobalNActiveGravity = TimeBinsGravity.GlobalNActiveParticles; + + double dt_gravsum = 0; + + int bin_highest_occupied = 0; + int timebin; + /* go over all timebins */ + + for(timebin = All.HighestSynchronizedTimeBin; timebin >= 0; timebin--) + { + TimeBinsGravity.NActiveParticles = 0; + timebin_add_particles_of_timebin_to_list_of_active_particles(&TimeBinsGravity, timebin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); + + if(TimeBinsGravity.GlobalNActiveParticles == 0) /* we are done at this point */ + break; + + /* calculate gravity for all active particles */ + if(TimeBinsGravity.GlobalNActiveParticles != Previous_GlobalNActiveGravity) + { + TIMER_STOP(CPU_DRIFTS); + + compute_grav_accelerations(timebin, FLAG_PARTIAL_TREE); + + TIMER_START(CPU_DRIFTS); + } + + int nfine = 0; + for(int i = 0; i < TimeBinsGravity.NActiveParticles; i++) + { + int target = TimeBinsGravity.ActiveParticleList[i]; + int binold = P[target].TimeBinGrav; + + if(test_if_grav_timestep_is_too_large(target, binold)) + nfine++; + } + + long long nfine_tot; + sumup_large_ints(1, &nfine, &nfine_tot); + + int push_down_flag = 0; + if(nfine_tot > 0.33 * TimeBinsGravity.GlobalNActiveParticles) + push_down_flag = 1; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + int binold = P[i].TimeBinGrav; + + if(push_down_flag || test_if_grav_timestep_is_too_large(i, binold)) + { + int bin = binold - 1; + if(bin == 0) + { + print_particle_info(i); + terminate("timestep too small"); + } + + timebin_move_particle(&TimeBinsGravity, i, binold, bin); + P[i].TimeBinGrav = bin; + } + else if(binold > bin_highest_occupied) + bin_highest_occupied = binold; + } + + if(All.HighestOccupiedTimeBin == 0) + { + MPI_Allreduce(&bin_highest_occupied, &All.HighestOccupiedTimeBin, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + if(All.HighestOccupiedTimeBin > 0) + { + mpi_printf("KICKS: Special Start-up Fix: All.HighestOccupiedGravTimeBin=%d\n", All.HighestOccupiedTimeBin); + + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] == 0) + All.LevelToTimeBin[i] = All.HighestOccupiedTimeBin; + } + } + } + + if(TimeBinsGravity.GlobalNActiveParticles) + { + ti_step = timebin ? (((integertime)1) << timebin) : 0; + tstart = All.Ti_begstep[timebin]; /* beginning of step */ + tend = tstart + ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + if(timebin < All.HighestSynchronizedTimeBin) + { + ti_step = (timebin + 1) ? (((integertime)1) << (timebin + 1)) : 0; + + tstart = All.Ti_begstep[timebin + 1]; /* beginning of step */ + tend = tstart + ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick -= get_gravkick_factor(tstart, tend); + else + dt_gravkick -= (tend - tstart) * All.Timebase_interval; + } + + dt_gravsum += dt_gravkick; + + mpi_printf("KICKS: 1st gravity for hierarchical timebin=%d: %lld particles\n", timebin, + TimeBinsGravity.GlobalNActiveParticles); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + kick_particle(i, dt_gravkick, P[i].GravAccel); + } + Previous_GlobalNActiveGravity = TimeBinsGravity.GlobalNActiveParticles; + } + } + + /* reconstruct list of active particles because it is used for other things too (i.e. wind particles) */ + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); +#else /* #ifdef HIERARCHICAL_GRAVITY */ + +#ifdef FORCE_EQUAL_TIMESTEPS + // gravity timebin is already set, and not anymore 0 as All.HighestActiveTimeBin, but all particles should receive a first half kick + // in the 0-th timestep + if(All.NumCurrentTiStep == 0) + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, TIMEBINS); + else +#endif /* #ifdef FORCE_EQUAL_TIMESTEPS */ + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); + + mpi_printf("KICKS: 1st gravity for highest active timebin=%d: particles %lld\n", All.HighestActiveTimeBin, + TimeBinsGravity.GlobalNActiveParticles); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifndef FORCE_EQUAL_TIMESTEPS + int binold = P[i].TimeBinGrav; + int bin = -1; + + ti_step = get_timestep_gravity(i); + timebins_get_bin_and_do_validity_checks(ti_step, &bin, P[i].TimeBinGrav); + + if(P[i].Type == 0) + { + int bin_hydro = P[i].TimeBinHydro; + if(bin_hydro < bin) + bin = bin_hydro; + } + + ti_step = bin ? (((integertime)1) << bin) : 0; + + timebin_move_particle(&TimeBinsGravity, i, binold, bin); + P[i].TimeBinGrav = bin; +#else /* #ifndef FORCE_EQUAL_TIMESTEPS */ + int bin = P[i].TimeBinGrav; + ti_step = bin ? (((integertime)1) << bin) : 0; +#endif /* #ifndef FORCE_EQUAL_TIMESTEPS #else */ + + tstart = All.Ti_begstep[bin]; /* beginning of step */ + tend = tstart + ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + kick_particle(i, dt_gravkick, P[i].GravAccel); + } +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ + + TIMER_STOP(CPU_DRIFTS); +#endif +} + +/*! \brief Performs the second half step kick operator. + * + * This function applies a half step kick similar to + * do_gravity_step_first_half(). First the short range kick due to the tree + * force is added. If we are on a PM step the kick due to the particle mesh's + * long range gravity is applied too. In both cases the momentum and energy + * for gas cells is updated. + */ +void do_gravity_step_second_half(void) +{ +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + TIMER_START(CPU_DRIFTS); + int idx; + char fullmark[8]; + + if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) + sprintf(fullmark, "(*)"); + else + fullmark[0] = 0; + + if(ThisTask == 0) + fprintf(FdTimings, "\nStep%s: %d, t: %g, dt: %g, highest active timebin: %d (lowest active: %d, highest occupied: %d)\n", + fullmark, All.NumCurrentTiStep, All.Time, All.TimeStep, All.HighestActiveTimeBin, All.LowestActiveTimeBin, + All.HighestOccupiedTimeBin); + + double dt_gravkick; +#ifdef PMGRID + if(All.PM_Ti_endstep == All.Ti_Current) /* need to do long-range kick */ + { + TIMER_STOP(CPU_DRIFTS); + long_range_force(); + TIMER_START(CPU_DRIFTS); + } +#endif /* #ifdef PMGRID */ +#ifdef HIERARCHICAL_GRAVITY + /* go over all timebins, in inverse sequence so that we end up getting the cumulative force at the end */ + for(int timebin = 0; timebin <= All.HighestActiveTimeBin; timebin++) + { + if(TimeBinSynchronized[timebin]) + { + /* need to make all timebins below the current one active */ + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, timebin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); + + if(TimeBinsGravity.GlobalNActiveParticles) + { + TIMER_STOP(CPU_DRIFTS); + + compute_grav_accelerations(timebin, (timebin == All.HighestActiveTimeBin) ? FLAG_FULL_TREE : FLAG_PARTIAL_TREE); + + TIMER_START(CPU_DRIFTS); + + mpi_printf("KICKS: 2nd gravity for hierarchical timebin=%d: particles %lld\n", timebin, + TimeBinsGravity.GlobalNActiveParticles); + + integertime ti_step = timebin ? (((integertime)1) << timebin) : 0; + + integertime tend = All.Ti_begstep[timebin]; /* end of step (Note: All.Ti_begstep[] has already been advanced for the next + step at this point) */ + integertime tstart = tend - ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + if(timebin < All.HighestActiveTimeBin) + { + ti_step = (timebin + 1) ? (((integertime)1) << (timebin + 1)) : 0; + + tend = All.Ti_begstep[timebin + 1]; /* end of step (Note: All.Ti_begstep[] has already been advanced for the next + step at this point) */ + tstart = tend - ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick -= get_gravkick_factor(tstart, tend); + else + dt_gravkick -= (tend - tstart) * All.Timebase_interval; + } + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + kick_particle(i, dt_gravkick, P[i].GravAccel); + + if(P[i].Type == 0) + { + if(All.HighestOccupiedTimeBin == timebin) + for(int j = 0; j < 3; j++) + SphP[i].FullGravAccel[j] = P[i].GravAccel[j]; + } + } + } + } + } + +#else /* #ifdef HIERARCHICAL_GRAVITY */ + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); + + if(TimeBinsGravity.GlobalNActiveParticles) + { + TIMER_STOP(CPU_DRIFTS); + + /* calculate gravity for all active particles */ + compute_grav_accelerations(All.HighestActiveTimeBin, FLAG_FULL_TREE); + + TIMER_START(CPU_DRIFTS); + + mpi_printf("KICKS: 2nd gravity for highest active timebin=%d: particles %lld\n", All.HighestActiveTimeBin, + TimeBinsGravity.GlobalNActiveParticles); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + integertime ti_step = P[i].TimeBinGrav ? (((integertime)1) << P[i].TimeBinGrav) : 0; + integertime tend = All.Ti_begstep[P[i].TimeBinGrav]; + integertime tstart = tend - ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + kick_particle(i, dt_gravkick, P[i].GravAccel); + } + } +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ + +#ifdef PMGRID + if(All.PM_Ti_endstep == All.Ti_Current) /* need to do long-range kick */ + { + integertime ti_step = All.PM_Ti_endstep - All.PM_Ti_begstep; + integertime tstart = All.PM_Ti_begstep + ti_step / 2; + integertime tend = tstart + ti_step / 2; + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + for(int i = 0; i < NumPart; i++) + kick_particle(i, dt_gravkick, P[i].GravPM); + } +#endif /* #ifdef PMGRID */ + + TIMER_STOP(CPU_DRIFTS); +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY)|| defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) */ +} diff --git a/src/amuse/community/arepo/src/time_integration/driftfac.c b/src/amuse/community/arepo/src/time_integration/driftfac.c new file mode 100644 index 0000000000..eae2e438aa --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/driftfac.c @@ -0,0 +1,307 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/driftfac.c + * \date 05/2018 + * \brief Methods for drift and kick pre-factors needed for + * simulations in a cosmologically expanding box. + * \details contains functions: + * double drift_integ(double a, void *param) + * double gravkick_integ(double a, void *param) + * double hydrokick_integ(double a, void *param) + * void init_drift_table(void) + * double get_drift_factor(integertime time0, integertime time1) + * double get_gravkick_factor(integertime time0, integertime + * time1) + * double get_hydrokick_factor(integertime time0, integertime + * time1) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! table for the cosmological drift factors */ +static double DriftTable[DRIFT_TABLE_LENGTH]; + +/*! table for the cosmological kick factor for gravitational forces */ +static double GravKickTable[DRIFT_TABLE_LENGTH]; + +/*! table for the cosmological kick factor for hydrodynmical forces */ +static double HydroKickTable[DRIFT_TABLE_LENGTH]; + +static double logTimeBegin; +static double logTimeMax; + +/*! \brief Integrand for drift factor calculation. + * + * For cosmological simulations. + * + * \param[in] a Scalefactor. + * \param[in] param (unused) + * + * \return Integrand for drift factor calculation. + */ +double drift_integ(double a, void *param) +{ + double h; + + h = hubble_function(a); + + return 1 / (h * a * a * a); +} + +/*! \brief Integrand for gravitational kick factor calculation. + * + * For cosmological simulations. + * + * \param[in] a Scalefactor. + * \param[in] param (unused) + * + * \return Integrand for gravitational kick factor calculation. + */ +double gravkick_integ(double a, void *param) +{ + double h; + + h = hubble_function(a); + + return 1 / (h * a * a); +} + +/*! \brief Integrand for hydrodynamics kick factor calculation. + * + * For cosmological simulations. + * + * \param[in] a Scalefactor. + * \param[in] param (unused) + * + * \return Integrand for hydrodynamics kick factor calculation. + */ +double hydrokick_integ(double a, void *param) +{ + double h; + + h = hubble_function(a); + + return 1 / (h * pow(a, 3 * GAMMA_MINUS1) * a); +} + +/*! \brief Initializes lookup table for cosmological pre-factors for a drift. + * + * Numerical integrals using the integrand functions defined above. + * + * \return void + */ +void init_drift_table(void) +{ +#define WORKSIZE 100000 + int i; + double result, abserr; + + gsl_function F; + gsl_integration_workspace *workspace; + + logTimeBegin = log(All.TimeBegin); + logTimeMax = log(All.TimeMax); + + workspace = gsl_integration_workspace_alloc(WORKSIZE); + + for(i = 0; i < DRIFT_TABLE_LENGTH; i++) + { + F.function = &drift_integ; + gsl_integration_qag(&F, exp(logTimeBegin), exp(logTimeBegin + ((logTimeMax - logTimeBegin) / DRIFT_TABLE_LENGTH) * (i + 1)), 0, + 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr); + DriftTable[i] = result; + + F.function = &gravkick_integ; + gsl_integration_qag(&F, exp(logTimeBegin), exp(logTimeBegin + ((logTimeMax - logTimeBegin) / DRIFT_TABLE_LENGTH) * (i + 1)), 0, + 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr); + GravKickTable[i] = result; + + F.function = &hydrokick_integ; + gsl_integration_qag(&F, exp(logTimeBegin), exp(logTimeBegin + ((logTimeMax - logTimeBegin) / DRIFT_TABLE_LENGTH) * (i + 1)), 0, + 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr); + HydroKickTable[i] = result; + } + + gsl_integration_workspace_free(workspace); +} + +/*! \brief This function integrates the cosmological prefactor for a drift + * step between time0 and time1. A lookup-table is used for reasons + * of speed. + * + * \param[in] time0 Start time. + * \param[in] time1 End time. + * + * \return \f[ \int_{a_0}^{a_1} \frac{{\rm d}a}{H(a)} \f]. + */ +double get_drift_factor(integertime time0, integertime time1) +{ + double a1, a2, df1, df2, u1, u2; + int i1, i2; + static integertime last_time0 = -1, last_time1 = -1; + static double last_value; + + if(time0 == last_time0 && time1 == last_time1) + return last_value; + + /* note: will only be called for cosmological integration */ + + a1 = logTimeBegin + time0 * All.Timebase_interval; + a2 = logTimeBegin + time1 * All.Timebase_interval; + + u1 = (a1 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i1 = (int)u1; + if(i1 >= DRIFT_TABLE_LENGTH) + i1 = DRIFT_TABLE_LENGTH - 1; + + if(i1 <= 1) + df1 = u1 * DriftTable[0]; + else + df1 = DriftTable[i1 - 1] + (DriftTable[i1] - DriftTable[i1 - 1]) * (u1 - i1); + + u2 = (a2 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i2 = (int)u2; + if(i2 >= DRIFT_TABLE_LENGTH) + i2 = DRIFT_TABLE_LENGTH - 1; + + if(i2 <= 1) + df2 = u2 * DriftTable[0]; + else + df2 = DriftTable[i2 - 1] + (DriftTable[i2] - DriftTable[i2 - 1]) * (u2 - i2); + + last_time0 = time0; + last_time1 = time1; + + return last_value = (df2 - df1); +} + +/*! \brief This function integrates the cosmological prefactor for a + * gravitational kick between time0 and time1. A lookup-table is used + * for reasons of speed. + * + * \param[in] time0 Start time. + * \param[in] time1 End time. + * + * \return Gravkick factor. + */ +double get_gravkick_factor(integertime time0, integertime time1) +{ + double a1, a2, df1, df2, u1, u2; + int i1, i2; + static integertime last_time0 = -1, last_time1 = -1; + static double last_value; + + if(time0 == last_time0 && time1 == last_time1) + return last_value; + + /* note: will only be called for cosmological integration */ + + a1 = logTimeBegin + time0 * All.Timebase_interval; + a2 = logTimeBegin + time1 * All.Timebase_interval; + + u1 = (a1 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i1 = (int)u1; + if(i1 >= DRIFT_TABLE_LENGTH) + i1 = DRIFT_TABLE_LENGTH - 1; + + if(i1 <= 1) + df1 = u1 * GravKickTable[0]; + else + df1 = GravKickTable[i1 - 1] + (GravKickTable[i1] - GravKickTable[i1 - 1]) * (u1 - i1); + + u2 = (a2 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i2 = (int)u2; + if(i2 >= DRIFT_TABLE_LENGTH) + i2 = DRIFT_TABLE_LENGTH - 1; + + if(i2 <= 1) + df2 = u2 * GravKickTable[0]; + else + df2 = GravKickTable[i2 - 1] + (GravKickTable[i2] - GravKickTable[i2 - 1]) * (u2 - i2); + + last_time0 = time0; + last_time1 = time1; + + return last_value = (df2 - df1); +} + +/*! \brief This function integrates the cosmological prefactor for a + * hydrodynamical kick between time0 and time1. A lookup-table is + * used for reasons of speed. + * + * \param[in] time0 Start time + * \param[in] time1 End time + * + * \return Hydro kick factor. + */ +double get_hydrokick_factor(integertime time0, integertime time1) +{ + double a1, a2, df1, df2, u1, u2; + int i1, i2; + static integertime last_time0 = -1, last_time1 = -1; + static double last_value; + + if(time0 == last_time0 && time1 == last_time1) + return last_value; + + /* note: will only be called for cosmological integration */ + + a1 = logTimeBegin + time0 * All.Timebase_interval; + a2 = logTimeBegin + time1 * All.Timebase_interval; + + u1 = (a1 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i1 = (int)u1; + if(i1 >= DRIFT_TABLE_LENGTH) + i1 = DRIFT_TABLE_LENGTH - 1; + + if(i1 <= 1) + df1 = u1 * HydroKickTable[0]; + else + df1 = HydroKickTable[i1 - 1] + (HydroKickTable[i1] - HydroKickTable[i1 - 1]) * (u1 - i1); + + u2 = (a2 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i2 = (int)u2; + if(i2 >= DRIFT_TABLE_LENGTH) + i2 = DRIFT_TABLE_LENGTH - 1; + + if(i2 <= 1) + df2 = u2 * HydroKickTable[0]; + else + df2 = HydroKickTable[i2 - 1] + (HydroKickTable[i2] - HydroKickTable[i2 - 1]) * (u2 - i2); + + last_time0 = time0; + last_time1 = time1; + + return last_value = (df2 - df1); +} diff --git a/src/amuse/community/arepo/src/time_integration/predict.c b/src/amuse/community/arepo/src/time_integration/predict.c new file mode 100644 index 0000000000..f377af5b5e --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/predict.c @@ -0,0 +1,506 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/predict.c + * \date 05/2018 + * \brief Routines to find the next sync point, manage the list + * of active timebins/active particles and to drift particles. + * \details contains functions: + * void reconstruct_timebins(void) + * void find_next_sync_point(void) + * void mark_active_timebins(void) + * void drift_all_particles(void) + * void drift_particle(int i, integertime time1) + * static int int_compare(const void *a, const void *b) + * void make_list_of_active_particles(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 08.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief This function (re)builds the time bin lists. + * + * It counts the number of particles in each timebin and updates the + * linked lists containing the particles of each time bin. Afterwards the + * linked list of active particles is updated by + * make_list_of_active_particles(). + * + * The linked lists for each timebin are stored in 'FirstInTimeBin', + * 'LastInTimeBin', 'PrevInTimeBin' and 'NextInTimeBin'. The counters + * of particles per timebin are 'TimeBinCount' and 'TimeBinCountSph'. + * + * \return void + */ +void reconstruct_timebins(void) +{ + TIMER_START(CPU_TIMELINE); + + int i, bin; + + for(bin = 0; bin < TIMEBINS; bin++) + { + TimeBinsHydro.TimeBinCount[bin] = 0; + TimeBinsHydro.FirstInTimeBin[bin] = -1; + TimeBinsHydro.LastInTimeBin[bin] = -1; + + TimeBinsGravity.TimeBinCount[bin] = 0; + TimeBinsGravity.FirstInTimeBin[bin] = -1; + TimeBinsGravity.LastInTimeBin[bin] = -1; + +#ifdef USE_SFR + TimeBinSfr[bin] = 0; +#endif + } + + for(i = 0; i < NumGas; i++) + { + if(P[i].ID == 0 && P[i].Mass == 0) + continue; + + if(P[i].Type != 0) + continue; + + bin = P[i].TimeBinHydro; + + if(TimeBinsHydro.TimeBinCount[bin] > 0) + { + TimeBinsHydro.PrevInTimeBin[i] = TimeBinsHydro.LastInTimeBin[bin]; + TimeBinsHydro.NextInTimeBin[i] = -1; + TimeBinsHydro.NextInTimeBin[TimeBinsHydro.LastInTimeBin[bin]] = i; + TimeBinsHydro.LastInTimeBin[bin] = i; + } + else + { + TimeBinsHydro.FirstInTimeBin[bin] = TimeBinsHydro.LastInTimeBin[bin] = i; + TimeBinsHydro.PrevInTimeBin[i] = TimeBinsHydro.NextInTimeBin[i] = -1; + } + TimeBinsHydro.TimeBinCount[bin]++; + +#ifdef USE_SFR + TimeBinSfr[bin] += SphP[i].Sfr; +#endif + } + + for(i = 0; i < NumPart; i++) + { + if(P[i].ID == 0 && P[i].Mass == 0) + continue; + + bin = P[i].TimeBinGrav; + + if(TimeBinsGravity.TimeBinCount[bin] > 0) + { + TimeBinsGravity.PrevInTimeBin[i] = TimeBinsGravity.LastInTimeBin[bin]; + TimeBinsGravity.NextInTimeBin[i] = -1; + TimeBinsGravity.NextInTimeBin[TimeBinsGravity.LastInTimeBin[bin]] = i; + TimeBinsGravity.LastInTimeBin[bin] = i; + } + else + { + TimeBinsGravity.FirstInTimeBin[bin] = TimeBinsGravity.LastInTimeBin[bin] = i; + TimeBinsGravity.PrevInTimeBin[i] = TimeBinsGravity.NextInTimeBin[i] = -1; + } + TimeBinsGravity.TimeBinCount[bin]++; + } + + make_list_of_active_particles(); + + TIMER_STOP(CPU_TIMELINE); +} + +/*! \brief This function finds the next synchronization point of the system. + * (i.e. the earliest point of time any of the particles needs a force + * computation). + * + * \return void + */ +void find_next_sync_point(void) +{ + int n; + integertime ti_next_kick, ti_next_kick_global, ti_next_for_bin, dt_bin; + double timeold; + + TIMER_START(CPU_DRIFTS); + + timeold = All.Time; + + All.NumCurrentTiStep++; + + /* find the next kick time */ + ti_next_kick = TIMEBASE; + + for(n = 0; n < TIMEBINS; n++) + { + int active = TimeBinsHydro.TimeBinCount[n]; + +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + active += TimeBinsGravity.TimeBinCount[n]; +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + */ + if(active) + { + if(n > 0) + { + dt_bin = (((integertime)1) << n); + ti_next_for_bin = (All.Ti_Current / dt_bin) * dt_bin + dt_bin; /* next kick time for this timebin */ + } + else + { + dt_bin = 0; + ti_next_for_bin = All.Ti_Current; + } + + if(ti_next_for_bin < ti_next_kick) + ti_next_kick = ti_next_for_bin; + } + } + +#ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME + minimum_large_ints(1, &ti_next_kick, &ti_next_kick_global); +#else /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME */ + MPI_Allreduce(&ti_next_kick, &ti_next_kick_global, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); +#endif /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME #else */ + + All.Previous_Ti_Current = All.Ti_Current; + All.Ti_Current = ti_next_kick_global; + + if(All.ComovingIntegrationOn) + All.Time = All.TimeBegin * exp(All.Ti_Current * All.Timebase_interval); + else + All.Time = All.TimeBegin + All.Ti_Current * All.Timebase_interval; + + set_cosmo_factors_for_current_time(); + + All.TimeStep = All.Time - timeold; + + mark_active_timebins(); + + TIMER_STOP(CPU_DRIFTS); +} + +/*! \brief Sets active timebins for current time-step in global variables. + * + * \return void + */ +void mark_active_timebins(void) +{ + int n; + int lowest_active_bin = TIMEBINS, highest_active_bin = 0; + int lowest_occupied_bin = TIMEBINS, highest_occupied_bin = 0; + int lowest_occupied_gravity_bin = TIMEBINS, highest_occupied_gravity_bin = 0; + int highest_synchronized_bin = 0; + int nsynchronized_gravity = 0, nsynchronized_hydro = 0; + integertime dt_bin; + + /* mark the bins that will be synchronized/active */ + + for(n = 0; n < TIMEBINS; n++) + { + if(TimeBinsGravity.TimeBinCount[n]) + { + if(highest_occupied_gravity_bin < n) + highest_occupied_gravity_bin = n; + + if(lowest_occupied_gravity_bin > n) + lowest_occupied_gravity_bin = n; + } + + int active = TimeBinsHydro.TimeBinCount[n] + TimeBinsGravity.TimeBinCount[n]; + + if(active) + { + if(highest_occupied_bin < n) + highest_occupied_bin = n; + + if(lowest_occupied_bin > n) + lowest_occupied_bin = n; + } + + dt_bin = (((integertime)1) << n); + + if((All.Ti_Current % dt_bin) == 0) + { + TimeBinSynchronized[n] = 1; + All.Ti_begstep[n] = All.Ti_Current; + + nsynchronized_gravity += TimeBinsGravity.TimeBinCount[n]; + nsynchronized_hydro += TimeBinsHydro.TimeBinCount[n]; + + if(highest_synchronized_bin < n) + highest_synchronized_bin = n; + + if(active) + { + if(highest_active_bin < n) + highest_active_bin = n; + + if(lowest_active_bin > n) + lowest_active_bin = n; + } + } + else + TimeBinSynchronized[n] = 0; + } + + int lowest_in[3], lowest_out[3]; + lowest_in[0] = lowest_occupied_bin; + lowest_in[1] = lowest_occupied_gravity_bin; + lowest_in[2] = lowest_active_bin; + MPI_Allreduce(lowest_in, lowest_out, 3, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + All.LowestOccupiedTimeBin = lowest_out[0]; + All.LowestOccupiedGravTimeBin = lowest_out[1]; + All.LowestActiveTimeBin = lowest_out[2]; + + int highest_in[4], highest_out[4]; + highest_in[0] = highest_occupied_bin; + highest_in[1] = highest_occupied_gravity_bin; + highest_in[2] = highest_active_bin; + highest_in[3] = highest_synchronized_bin; + MPI_Allreduce(highest_in, highest_out, 4, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + All.HighestOccupiedTimeBin = highest_out[0]; + All.HighestOccupiedGravTimeBin = highest_out[1]; + All.HighestActiveTimeBin = highest_out[2]; + All.HighestSynchronizedTimeBin = highest_out[3]; + + /* note: the lowest synchronized bin is always 1 */ + + int input_ints[2 + 2 * TIMEBINS]; + long long output_longs[2 + 2 * TIMEBINS]; + + input_ints[0] = nsynchronized_hydro; + input_ints[1] = nsynchronized_gravity; + memcpy(input_ints + 2, TimeBinsGravity.TimeBinCount, TIMEBINS * sizeof(int)); + memcpy(input_ints + 2 + TIMEBINS, TimeBinsHydro.TimeBinCount, TIMEBINS * sizeof(int)); + + sumup_large_ints(2 + 2 * TIMEBINS, input_ints, output_longs); + + All.GlobalNSynchronizedHydro = output_longs[0]; + All.GlobalNSynchronizedGravity = output_longs[1]; + long long *tot_count_grav = output_longs + 2; + long long *tot_count_sph = output_longs + 2 + TIMEBINS; + + long long tot_grav = 0, tot_sph = 0; + + for(n = 0; n < TIMEBINS; n++) + { + tot_grav += tot_count_grav[n]; + tot_sph += tot_count_sph[n]; + + if(n > 0) + { + tot_count_grav[n] += tot_count_grav[n - 1]; + tot_count_sph[n] += tot_count_sph[n - 1]; + } + } + + All.SmallestTimeBinWithDomainDecomposition = All.HighestOccupiedTimeBin; + + for(n = All.HighestOccupiedTimeBin; n >= All.LowestOccupiedTimeBin; n--) + { + if(tot_count_grav[n] > All.ActivePartFracForNewDomainDecomp * tot_grav || + tot_count_sph[n] > All.ActivePartFracForNewDomainDecomp * tot_sph) + All.SmallestTimeBinWithDomainDecomposition = n; + } +} + +/*! \brief Applies drift operation to all particles to current time. + * + * \return void + */ +void drift_all_particles(void) +{ + int i; + + TIMER_START(CPU_DRIFTS); + + for(i = 0; i < NumPart; i++) + drift_particle(i, All.Ti_Current); + + TIMER_STOP(CPU_DRIFTS); +} + +/*! \brief This function drifts drifts a particle i to time1. + * + * \param[in] i Particle/cell index. + * \param[in] time1 Time to which particles get drifted. + * + * \return void + */ +void drift_particle(int i, integertime time1) +{ + int j; + + if(i < 0) + terminate("i=%d NumPart=%d", i, NumPart); + + integertime time0 = P[i].Ti_Current; + + if(time1 == time0) + return; + + if(time1 < time0) + terminate("no prediction into past allowed: time0=%lld time1=%lld\n", (long long)time0, (long long)time1); + + double dt_drift; + + if(All.ComovingIntegrationOn) + dt_drift = get_drift_factor(time0, time1); + else + dt_drift = (time1 - time0) * All.Timebase_interval; + + if(P[i].Type == 0) + { + for(j = 0; j < 3; j++) + { + P[i].Pos[j] += SphP[i].VelVertex[j] * dt_drift; + } + } + else + { +#ifndef MESHRELAX + for(j = 0; j < 3; j++) + P[i].Pos[j] += P[i].Vel[j] * dt_drift; + +#if defined(REFLECTIVE_X) + if(P[i].Pos[0] < 0 || P[i].Pos[0] > boxSize_X) + { + P[i].Pos[0] = 2 * (P[i].Pos[0] > boxSize_X ? 1 : 0) * boxSize_X - P[i].Pos[0]; + P[i].Vel[0] *= -1; + } +#endif /* #if defined(REFLECTIVE_X) */ +#if defined(REFLECTIVE_Y) + if(P[i].Pos[1] < 0 || P[i].Pos[1] > boxSize_Y) + { + P[i].Pos[1] = 2 * (P[i].Pos[1] > boxSize_Y ? 1 : 0) * boxSize_Y - P[i].Pos[1]; + P[i].Vel[1] *= -1; + } +#endif /* #if defined(REFLECTIVE_Y) */ +#if defined(REFLECTIVE_Z) + if(P[i].Pos[2] < 0 || P[i].Pos[2] > boxSize_Z) + { + P[i].Pos[2] = 2 * (P[i].Pos[2] > boxSize_Z ? 1 : 0) * boxSize_Z - P[i].Pos[2]; + P[i].Vel[2] *= -1; + } +#endif /* #if defined(REFLECTIVE_Z) */ + +#endif /* #ifndef MESHRELAX */ + } + + P[i].Ti_Current = time1; +} + +/*! \brief Comparison function for two integer values. + * + * \param[in] a First value. + * \param[in] b Second value. + * + * \return (-1,0,1); -1 if a < b + */ +static int int_compare(const void *a, const void *b) +{ + if(*((int *)a) < *((int *)b)) + return -1; + + if(*((int *)a) > *((int *)b)) + return +1; + + return 0; +} + +/*! \brief This function builds the linear list of active particles. + * + * The list is stored in the array ActiveParticleList of the TimeBinData + * structs. + * + * \return void + */ +void make_list_of_active_particles(void) +{ + TIMER_START(CPU_DRIFTS); + + int i, n; + /* make a link list with the particles in the active time bins */ + TimeBinsHydro.NActiveParticles = 0; + + for(n = 0; n < TIMEBINS; n++) + { + if(TimeBinSynchronized[n]) + { + for(i = TimeBinsHydro.FirstInTimeBin[n]; i >= 0; i = TimeBinsHydro.NextInTimeBin[i]) + if((P[i].Type == 0) && !((P[i].ID == 0) && (P[i].Mass == 0))) + { + if(P[i].Ti_Current != All.Ti_Current) + drift_particle(i, All.Ti_Current); + + TimeBinsHydro.ActiveParticleList[TimeBinsHydro.NActiveParticles] = i; + TimeBinsHydro.NActiveParticles++; + } + } + } + + TimeBinsGravity.NActiveParticles = 0; + + for(n = 0; n < TIMEBINS; n++) + { + if(TimeBinSynchronized[n]) + { + for(i = TimeBinsGravity.FirstInTimeBin[n]; i >= 0; i = TimeBinsGravity.NextInTimeBin[i]) + { + if(!((P[i].ID == 0) && (P[i].Mass == 0))) + { + if(P[i].Ti_Current != All.Ti_Current) + drift_particle(i, All.Ti_Current); + + TimeBinsGravity.ActiveParticleList[TimeBinsGravity.NActiveParticles] = i; + TimeBinsGravity.NActiveParticles++; + } + } + } + } + + /* sort both lists for better memory efficiency */ + mysort(TimeBinsHydro.ActiveParticleList, TimeBinsHydro.NActiveParticles, sizeof(int), int_compare); + mysort(TimeBinsGravity.ActiveParticleList, TimeBinsGravity.NActiveParticles, sizeof(int), int_compare); + + int in[6]; + long long out[6]; + + n = 2; + in[0] = TimeBinsGravity.NActiveParticles; + in[1] = TimeBinsHydro.NActiveParticles; + + sumup_large_ints(n, in, out); + + TimeBinsGravity.GlobalNActiveParticles = out[0]; + TimeBinsHydro.GlobalNActiveParticles = out[1]; + + TIMER_STOP(CPU_DRIFTS); +} diff --git a/src/amuse/community/arepo/src/time_integration/timestep.c b/src/amuse/community/arepo/src/time_integration/timestep.c new file mode 100644 index 0000000000..4224b3cc8f --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/timestep.c @@ -0,0 +1,980 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/timestep.c + * \date 05/2018 + * \brief Routines for 'kicking' particles in + * momentum space and assigning new timesteps. + * \details contains functions: + * void set_cosmo_factors_for_current_time(void) + * void find_timesteps_without_gravity(void) + * void update_timesteps_from_gravity(void) + * integertime get_timestep_pm(void) + * integertime get_timestep_gravity(int p) + * integertime get_timestep_hydro(int p) + * void validate_timestep(double dt, integertime ti_step, int p) + * int test_if_grav_timestep_is_too_large(int p, int bin) + * void find_long_range_step_constraint(void) + * int get_timestep_bin(integertime ti_step) + * double get_time_difference_in_Gyr(double a0, double a1) + * void timebins_init(struct TimeBinData *tbData, const char + * *name, int *MaxPart) + * void timebins_allocate(struct TimeBinData *tbData) + * void timebins_reallocate(struct TimeBinData *tbData) + * void timebins_get_bin_and_do_validity_checks(integertime + * ti_step, int *bin_new, int bin_old) + * void timebin_move_particle(struct TimeBinData *tbData, int p, + * int timeBin_old, int timeBin_new) + * void timebin_remove_particle(struct TimeBinData *tbData, + * int idx, int bin) + * void timebin_add_particle(struct TimeBinData *tbData, int + * i_new, int i_old, int timeBin, int + * addToListOfActiveParticles) + * void timebin_cleanup_list_of_active_particles(struct + * TimeBinData *tbData) + * void timebin_move_sfr(int p, int timeBin_old, int + * timeBin_new) + * void timebin_make_list_of_active_particles_up_to_timebin( + * struct TimeBinData *tbData, int timebin) + * void timebin_add_particles_of_timebin_to_list_of_active_ + * particles(struct TimeBinData *tbData, int timebin) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Sets various cosmological factors for the current simulation time. + * + * \return void + */ +void set_cosmo_factors_for_current_time(void) +{ + if(All.ComovingIntegrationOn) + { + All.cf_atime = All.Time; + All.cf_a2inv = 1 / (All.Time * All.Time); + All.cf_a3inv = 1 / (All.Time * All.Time * All.Time); + All.cf_afac1 = pow(All.Time, 3 * GAMMA_MINUS1); + All.cf_afac2 = 1 / pow(All.Time, 3 * GAMMA - 2); + All.cf_afac3 = pow(All.Time, 3 * (1 - GAMMA) / 2.0); + All.cf_hubble_a = All.cf_H = All.cf_Hrate = hubble_function(All.Time); + All.cf_time_hubble_a = All.Time * All.cf_hubble_a; + All.cf_redshift = 1 / All.Time - 1; + } + else + { + All.cf_atime = 1; + All.cf_a2inv = 1; + All.cf_a3inv = 1; + All.cf_afac1 = 1; + All.cf_afac2 = 1; + All.cf_afac3 = 1; + All.cf_hubble_a = 1; + All.cf_H = All.Hubble; + All.cf_time_hubble_a = 1; + All.cf_Hrate = 0; + All.cf_redshift = 0; + } +} + +/*! \brief Finds hydrodynamic timesteps for all particles. + * + * Validates the timestep and moves particles to appropriate timebin/ linked + * list of particles. + * + * \return void + */ +void find_timesteps_without_gravity(void) +{ +#ifdef TREE_BASED_TIMESTEPS + tree_based_timesteps(); +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + TIMER_START(CPU_TIMELINE); + + int idx, i, bin, binold; + integertime ti_step; + +#ifdef FORCE_EQUAL_TIMESTEPS + integertime globTimeStep = TIMEBASE; + +#ifdef PMGRID + globTimeStep = get_timestep_pm(); +#endif /* #ifdef PMGRID */ + +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + ti_step = get_timestep_gravity(i); + if(ti_step < globTimeStep) + globTimeStep = ti_step; + } +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + ti_step = get_timestep_hydro(i); + if(ti_step < globTimeStep) + globTimeStep = ti_step; + } + +#ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME + minimum_large_ints(1, &globTimeStep, &All.GlobalTimeStep); +#else /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME */ + MPI_Allreduce(&globTimeStep, &All.GlobalTimeStep, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); +#endif /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME #else */ + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + timebins_get_bin_and_do_validity_checks(All.GlobalTimeStep, &bin, P[i].TimeBinGrav); + binold = P[i].TimeBinGrav; + timebin_move_particle(&TimeBinsGravity, i, binold, bin); + P[i].TimeBinGrav = bin; + } + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + timebins_get_bin_and_do_validity_checks(All.GlobalTimeStep, &bin, P[i].TimeBinHydro); + binold = P[i].TimeBinHydro; + timebin_move_particle(&TimeBinsHydro, i, binold, bin); + P[i].TimeBinHydro = bin; + } + +#else /* #ifdef FORCE_EQUAL_TIMESTEPS */ + /* Calculate and assign hydro timesteps */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + + if(i < 0) + continue; + + ti_step = get_timestep_hydro(i); + + binold = P[i].TimeBinHydro; + + timebins_get_bin_and_do_validity_checks(ti_step, &bin, binold); + + timebin_move_particle(&TimeBinsHydro, i, binold, bin); + + P[i].TimeBinHydro = bin; + } +#endif /* #ifdef FORCE_EQUAL_TIMESTEPS #else */ + + TIMER_STOP(CPU_TIMELINE); +} + +/*! \brief Moves particles to lower timestep bin if required by gravity + * timestep criterion. + * + * \return void + */ +void update_timesteps_from_gravity(void) +{ +#ifdef FORCE_EQUAL_TIMESTEPS + return; /* don't need to do this */ +#endif /* #ifdef FORCE_EQUAL_TIMESTEPS */ + +#if !((defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE))) || defined(MESHRELAX) + return; +#endif /* #if !((defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE))) || defined(MESHRELAX) \ + */ + + TIMER_START(CPU_TIMELINE); + + int idx, i, binold; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < P[i].TimeBinHydro) + { + binold = P[i].TimeBinHydro; + timebin_move_particle(&TimeBinsHydro, i, binold, P[i].TimeBinGrav); + P[i].TimeBinHydro = P[i].TimeBinGrav; + } + } + + TIMER_STOP(CPU_TIMELINE); +} + +#ifdef PMGRID +/*! \brief Returns particle-mesh timestep as an integer-time variable. + * + * \return Integer timestep of particle-mesh algorithm. + */ +integertime get_timestep_pm(void) +{ + integertime ti_step = TIMEBASE; + while(ti_step > (All.DtDisplacement / All.Timebase_interval)) + ti_step >>= 1; + + if(ti_step > (All.PM_Ti_endstep - All.PM_Ti_begstep)) /* PM-timestep wants to increase */ + { + int bin = get_timestep_bin(ti_step); + int binold = get_timestep_bin(All.PM_Ti_endstep - All.PM_Ti_begstep); + + while(TimeBinSynchronized[bin] == 0 && bin > binold) /* make sure the new step is synchronized */ + bin--; + + ti_step = bin ? (((integertime)1) << bin) : 0; + } + + if(All.Ti_Current == TIMEBASE) /* we here finish the last timestep. */ + ti_step = 0; + + return ti_step; +} +#endif /* #ifdef PMGRID */ + +/*! \brief Returns gravity timestep as an integer-time variable. + * + * \param[in] p Index of particle in P array. + * + * \return Integer timestep limited due to gravitational acceleration. + */ +integertime get_timestep_gravity(int p) +{ + double dt; + integertime ti_step; + + double ax, ay, az, ac; + { + /* calculate total acceleration */ + ax = All.cf_a2inv * P[p].GravAccel[0]; + ay = All.cf_a2inv * P[p].GravAccel[1]; + az = All.cf_a2inv * P[p].GravAccel[2]; + +#if defined(PMGRID) && !defined(NO_PMFORCE_IN_SHORT_RANGE_TIMESTEP) + ax += All.cf_a2inv * P[p].GravPM[0]; + ay += All.cf_a2inv * P[p].GravPM[1]; + az += All.cf_a2inv * P[p].GravPM[2]; +#endif /* #if defined(PMGRID) && !defined(NO_PMFORCE_IN_SHORT_RANGE_TIMESTEP) */ + + ac = sqrt(ax * ax + ay * ay + az * az); /* this is now the physical acceleration */ + + if(ac == 0) + ac = 1.0e-30; + + switch(All.TypeOfTimestepCriterion) + { + case 0: + /* only type 0 implemented at the moment -> remove type ? */ + dt = sqrt(2 * All.ErrTolIntAccuracy * All.cf_atime * All.ForceSoftening[P[p].SofteningType] / 2.8 / ac); + break; + default: + terminate("Undefined timestep criterion"); + break; + } + +#ifdef EXTERNALGRAVITY + double dt_ext = sqrt(All.ErrTolIntAccuracy / P[p].dGravAccel); + if(dt_ext < dt) + dt = dt_ext; +#endif + } + + dt *= All.cf_hubble_a; + + if(P[p].Mass == 0 && P[p].ID == 0) + dt = All.MaxSizeTimestep; /* this particle has been swallowed or eliminated */ + + if(dt >= All.MaxSizeTimestep) + dt = All.MaxSizeTimestep; + + if(dt < All.MinSizeTimestep) + { +#ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP + dt = All.MinSizeTimestep; +#else /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP */ + print_particle_info(p); + terminate("Timestep dt=%g below All.MinSizeTimestep=%g", dt, All.MinSizeTimestep); +#endif /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP #else */ + } + +#ifdef PMGRID + if(dt >= All.DtDisplacement) + dt = All.DtDisplacement; +#endif /* #ifdef PMGRID */ + + ti_step = (integertime)(dt / All.Timebase_interval); + + validate_timestep(dt, ti_step, p); + + return ti_step; +} + +/*! \brief Returns hydrodynamics timestep as an integer-time variable. + * + * \param[in] p Index of particle in P and SphP array. + * + * \return Integer timestep limited due to CFL condition. + */ +integertime get_timestep_hydro(int p) +{ + double dt = 0, dt_courant = 0; + integertime ti_step; + + assert(P[p].Type == 0); + + double csnd = get_sound_speed(p); + +#if defined(VORONOI_STATIC_MESH) + csnd += sqrt(P[p].Vel[0] * P[p].Vel[0] + P[p].Vel[1] * P[p].Vel[1] + P[p].Vel[2] * P[p].Vel[2]) / All.cf_atime; +#endif /* #if defined(VORONOI_STATIC_MESH) */ + + double rad = get_cell_radius(p); + + if(csnd <= 0) + csnd = 1.0e-30; + + dt_courant = rad / csnd; + +#ifdef TREE_BASED_TIMESTEPS + if(dt_courant > SphP[p].CurrentMaxTiStep) + dt_courant = SphP[p].CurrentMaxTiStep; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + dt_courant *= All.CourantFac; + + if(All.ComovingIntegrationOn) + dt_courant *= All.Time; + + dt = dt_courant; + +#if defined(USE_SFR) + + if(P[p].Type == 0) /* to protect using a particle that has been turned into a star */ + { + double sfr = get_starformation_rate(p); + + double dt_sfr = 0.1 * P[p].Mass / (sfr / ((All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR))); + if(dt_sfr < dt) + dt = dt_sfr; + } +#endif /* #if defined(USE_SFR) */ + +#ifdef MHD_POWELL_LIMIT_TIMESTEP + double b = sqrt(SphP[p].B[0] * SphP[p].B[0] + SphP[p].B[1] * SphP[p].B[1] + SphP[p].B[2] * SphP[p].B[2]); + double bmin = sqrt(2 * 0.01 * SphP[p].Utherm * SphP[p].Density * All.cf_atime); + double v = sqrt(P[p].Vel[0] * P[p].Vel[0] + P[p].Vel[1] * P[p].Vel[1] + P[p].Vel[2] * P[p].Vel[2]) / All.cf_atime; + double dt_powell = 0.5 * (b + bmin) / (fabs(SphP[p].DivB / All.cf_atime * v)); + + if(dt_powell < dt) + dt = dt_powell; +#endif /* #ifdef MHD_POWELL_LIMIT_TIMESTEP */ + + /* convert the physical timestep to dloga if needed. Note: If comoving integration has not been selected, + All.cf_hubble_a=1. + */ + + dt *= All.cf_hubble_a; + + if(dt >= All.MaxSizeTimestep) + dt = All.MaxSizeTimestep; + +#ifdef TIMESTEP_OUTPUT_LIMIT + if(dt >= All.TimestepOutputLimit) + dt = All.TimestepOutputLimit; +#endif /* #ifdef TIMESTEP_OUTPUT_LIMIT */ + + if(dt < All.MinSizeTimestep) + { +#ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP + dt = All.MinSizeTimestep; +#else /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP */ + print_particle_info(p); + terminate("Timestep dt=%g below All.MinSizeTimestep=%g", dt, All.MinSizeTimestep); +#endif /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP #else */ + } + +#ifdef PMGRID + if(dt >= All.DtDisplacement) + dt = All.DtDisplacement; +#endif /* #ifdef PMGRID */ + + ti_step = (integertime)(dt / All.Timebase_interval); + + validate_timestep(dt, ti_step, p); + + return ti_step; +} + +/*! \brief Checks if timestep is a valid one. + * + * Terminates the simulation with error message otherwise. + * + * \return void + */ +void validate_timestep(double dt, integertime ti_step, int p) +{ + if(!(ti_step > 0 && ti_step < TIMEBASE)) + { + printf( + "\nError: An invalid timestep was assigned on the integer timeline!\n" + "We better stop.\n" + "Task=%d Part-ID=%lld type=%d", + ThisTask, (long long)P[p].ID, P[p].Type); + + printf("tibase=%g dt=%g ti_step=%d, xyz=(%g|%g|%g) vel=(%g|%g|%g) tree=(%g|%g|%g) mass=%g\n\n", All.Timebase_interval, dt, + (int)ti_step, P[p].Pos[0], P[p].Pos[1], P[p].Pos[2], P[p].Vel[0], P[p].Vel[1], P[p].Vel[2], P[p].GravAccel[0], + P[p].GravAccel[1], P[p].GravAccel[2], P[p].Mass); + + print_particle_info(p); + myflush(stdout); + terminate("integer timestep outside of allowed range"); + } + + if(ti_step == 1) + { + printf("Time-step of integer size 1 found for particle i=%d, pos=(%g|%g|%g), ID=%lld, dt=%g\n", p, P[p].Pos[0], P[p].Pos[1], + P[p].Pos[2], (long long)P[p].ID, dt); + print_particle_info(p); + } +} + +/*! \brief Checks if timestep according to its present timebin is too large + * compared to the requirements from gravity and hydrodynamics + * + * I.e. does the cell need to be moved to a finer timebin? + * + * \param[in] p Index of particle/cell. + * \param[in] bin Timebin to compare to. + * + * \return 0: not too large; 1: too large. + */ +int test_if_grav_timestep_is_too_large(int p, int bin) +{ + integertime ti_step_bin = bin ? (((integertime)1) << bin) : 0; + + integertime ti_step = get_timestep_gravity(p); + + if(P[p].Type == 0) + { + if((P[p].ID != 0) && (P[p].Mass != 0)) + { + int bin_hydro = P[p].TimeBinHydro; + integertime ti_step_hydro = bin_hydro ? (((integertime)1) << bin_hydro) : 0; + if(ti_step_hydro < ti_step) + ti_step = ti_step_hydro; + } + } + + if(ti_step < ti_step_bin) + return 1; + else + return 0; +} + +#ifdef PMGRID +/*! \brief Sets the global timestep for the long-range force calculation. + * + * Evaluates timestep constraints due to long range force acceleration of all + * simulation particles and finds its global minimum. + * + * \return void + */ +void find_long_range_step_constraint(void) +{ + int p; + double ax, ay, az, ac; + double dt, dtmin = MAX_DOUBLE_NUMBER; + + for(p = 0; p < NumPart; p++) + { + if(P[p].Type == 0) + continue; + +#ifdef PM_TIMESTEP_BASED_ON_TYPES + if(((1 << P[p].Type) & (PM_TIMESTEP_BASED_ON_TYPES))) +#endif /* #ifdef PM_TIMESTEP_BASED_ON_TYPES */ + { + /* calculate acceleration */ + ax = All.cf_a2inv * P[p].GravPM[0]; + ay = All.cf_a2inv * P[p].GravPM[1]; + az = All.cf_a2inv * P[p].GravPM[2]; + + ac = sqrt(ax * ax + ay * ay + az * az); /* this is now the physical acceleration */ + + if(ac < MIN_FLOAT_NUMBER) + ac = MIN_FLOAT_NUMBER; + + dt = sqrt(2.0 * All.ErrTolIntAccuracy * All.cf_atime * All.ForceSoftening[P[p].SofteningType] / (2.8 * ac)); + + dt *= All.cf_hubble_a; + + if(dt < dtmin) + dtmin = dt; + } + } + + dtmin *= 2.0; /* move it one timebin higher to prevent being too conservative */ + + MPI_Allreduce(&dtmin, &All.DtDisplacement, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + + mpi_printf("TIMESTEPS: displacement time constraint: %g (%g)\n", All.DtDisplacement, All.MaxSizeTimestep); + + if(All.DtDisplacement > All.MaxSizeTimestep) + All.DtDisplacement = All.MaxSizeTimestep; + + if(All.DtDisplacement < All.MinSizeTimestep) + All.DtDisplacement = All.MinSizeTimestep; +} +#endif /* #ifdef PMGRID */ + +/*! \brief Converts an integer time to a time bin. + * + * \param[in] ti_step Timestep as integertime variable. + * + * \return Associated time-bin. + */ +int get_timestep_bin(integertime ti_step) +{ + int bin = -1; + + if(ti_step == 0) + return 0; + + if(ti_step == 1) + terminate("time-step of integer size 1 not allowed\n"); + + while(ti_step) + { + bin++; + ti_step >>= 1; + } + + return bin; +} + +/*! \brief Calculates time difference in Gyr between two time integration unit + * values. + * + * If simulation non-cosmological, a0 and a1 are proper time in code units, + * for cosmological simulation a0 and a1 are scalefactors. + * + * \param[in] a0 First time or scalefactor. + * \param[in] a1 Second time or scalefactor. + * + * \return Time difference in Gyr. + */ +double get_time_difference_in_Gyr(double a0, double a1) +{ + double result, time_diff = 0, t0, t1, factor1, factor2, term1, term2; + + if(All.ComovingIntegrationOn) + { + if(All.OmegaLambda + All.Omega0 != 1) + printf("only implemented for flat cosmology so far."); + + factor1 = 2.0 / (3.0 * sqrt(All.OmegaLambda)); + + term1 = sqrt(All.OmegaLambda / All.Omega0) * pow(a0, 1.5); + term2 = sqrt(1 + All.OmegaLambda / All.Omega0 * pow(a0, 3)); + factor2 = log(term1 + term2); + + t0 = factor1 * factor2; + + term1 = sqrt(All.OmegaLambda / All.Omega0) * pow(a1, 1.5); + term2 = sqrt(1 + All.OmegaLambda / All.Omega0 * pow(a1, 3)); + factor2 = log(term1 + term2); + + t1 = factor1 * factor2; + + result = t1 - t0; + + time_diff = result / (HUBBLE * All.HubbleParam); /* now in seconds */ + time_diff /= SEC_PER_MEGAYEAR * 1000; /* now in gigayears */ + } + else + { + time_diff = (a1 - a0) * All.UnitTime_in_s / All.HubbleParam; /* now in seconds */ + time_diff /= SEC_PER_MEGAYEAR * 1000; /* now in gigayears */ + } + + return time_diff; +} + +/*! \brief Initializes time bin data. + * + * Does not allocate anything! + * + * \param[out] tbData Time bin data to be initialized. + * \param[in] name Name stored in time bin data. + * \param[in] MaxPart Maximum number of particles in time bin data. + * + * \return void + */ +void timebins_init(struct TimeBinData *tbData, const char *name, int *MaxPart) +{ + int i; + tbData->NActiveParticles = 0; + tbData->ActiveParticleList = 0; + + for(i = 0; i < TIMEBINS; i++) + { + tbData->FirstInTimeBin[i] = -1; + tbData->LastInTimeBin[i] = -1; + } + + tbData->NextInTimeBin = 0; + tbData->PrevInTimeBin = 0; + + strncpy(tbData->Name, name, 99); + tbData->Name[99] = 0; + tbData->MaxPart = MaxPart; +} + +/*! \brief Allocates linked lists in time bin data. + * + * With tbData->MaxPart elements. + * + * \param[in, out] tbData Pointer to time bin data to be allocated. + * + * \return void + */ +void timebins_allocate(struct TimeBinData *tbData) +{ + char Identifier[200]; + Identifier[199] = 0; + + snprintf(Identifier, 199, "NextActiveParticle%s", tbData->Name); + tbData->ActiveParticleList = (int *)mymalloc_movable(&tbData->ActiveParticleList, Identifier, *(tbData->MaxPart) * sizeof(int)); + + snprintf(Identifier, 199, "NextInTimeBin%s", tbData->Name); + tbData->NextInTimeBin = (int *)mymalloc_movable(&tbData->NextInTimeBin, Identifier, *(tbData->MaxPart) * sizeof(int)); + + snprintf(Identifier, 199, "PrevInTimeBin%s", tbData->Name); + tbData->PrevInTimeBin = (int *)mymalloc_movable(&tbData->PrevInTimeBin, Identifier, *(tbData->MaxPart) * sizeof(int)); +} + +/*! \brief Re-allocates linked lists in time bin data. + * + * With tbData->MaxPart elements. + * + * \param[out] tbData Pointer to time bin data to be re-allocated. + * + * \return void + */ +void timebins_reallocate(struct TimeBinData *tbData) +{ + tbData->ActiveParticleList = (int *)myrealloc_movable(tbData->ActiveParticleList, *(tbData->MaxPart) * sizeof(int)); + tbData->NextInTimeBin = (int *)myrealloc_movable(tbData->NextInTimeBin, *(tbData->MaxPart) * sizeof(int)); + tbData->PrevInTimeBin = (int *)myrealloc_movable(tbData->PrevInTimeBin, *(tbData->MaxPart) * sizeof(int)); +} + +/*! \brief Gets timebin and checks if bin is valid. + * + * Checks for example if old bin is synchronized with the bin it should be + * moved to. + * + * \param[in] ti_step Timestep in integertime. + * \param[out] bin_new New time bin. + * \param[in] bin_old Old time bin. + * + * \return void + */ +void timebins_get_bin_and_do_validity_checks(integertime ti_step, int *bin_new, int bin_old) +{ + /* make it a power 2 subdivision */ + integertime ti_min = TIMEBASE; + while(ti_min > ti_step) + ti_min >>= 1; + ti_step = ti_min; + + /* get timestep bin */ + int bin = -1; + + if(ti_step == 0) + bin = 0; + + if(ti_step == 1) + terminate("time-step of integer size 1 not allowed\n"); + + while(ti_step) + { + bin++; + ti_step >>= 1; + } + + if(bin > bin_old) /* timestep wants to increase */ + { + while(TimeBinSynchronized[bin] == 0 && bin > bin_old) /* make sure the new step is synchronized */ + bin--; + + ti_step = bin ? (((integertime)1) << bin) : 0; + } + + if(All.Ti_Current >= TIMEBASE) /* we here finish the last timestep. */ + { + ti_step = 0; + bin = 0; + } + + if((TIMEBASE - All.Ti_Current) < ti_step) /* check that we don't run beyond the end */ + { + terminate("we are beyond the end of the timeline"); /* should not happen */ + } + + *bin_new = bin; +} + +/*! \brief Move particle from one time bin to another. + * + * \param[in, out] tbData Time bin data structure to operate on. + * \param[in] p Index of the particle to be moved. + * \param[in] timeBin_old Old time bin of particle to be moved. + * \param[in] timeBin_new New time bin of particle to be moved. + * + * \return void + */ +void timebin_move_particle(struct TimeBinData *tbData, int p, int timeBin_old, int timeBin_new) +{ + if(timeBin_old == timeBin_new) + return; + + tbData->TimeBinCount[timeBin_old]--; + + int prev = tbData->PrevInTimeBin[p]; + int next = tbData->NextInTimeBin[p]; + + if(tbData->FirstInTimeBin[timeBin_old] == p) + tbData->FirstInTimeBin[timeBin_old] = next; + if(tbData->LastInTimeBin[timeBin_old] == p) + tbData->LastInTimeBin[timeBin_old] = prev; + if(prev >= 0) + tbData->NextInTimeBin[prev] = next; + if(next >= 0) + tbData->PrevInTimeBin[next] = prev; + + if(tbData->TimeBinCount[timeBin_new] > 0) + { + tbData->PrevInTimeBin[p] = tbData->LastInTimeBin[timeBin_new]; + tbData->NextInTimeBin[tbData->LastInTimeBin[timeBin_new]] = p; + tbData->NextInTimeBin[p] = -1; + tbData->LastInTimeBin[timeBin_new] = p; + } + else + { + tbData->FirstInTimeBin[timeBin_new] = tbData->LastInTimeBin[timeBin_new] = p; + tbData->PrevInTimeBin[p] = tbData->NextInTimeBin[p] = -1; + } + + tbData->TimeBinCount[timeBin_new]++; + +#ifdef USE_SFR + if((P[p].Type == 0) && (tbData == &TimeBinsHydro)) + timebin_move_sfr(p, timeBin_old, timeBin_new); +#endif /* #ifdef USE_SFR */ +} + +/*! \brief Removes a particle from time bin structure. + * + * Can only be done with active particles. + * + * \param[in, out] tbData Time bin structure to be operated on. + * \param[in] idx Index of particle in ActiveParticleList. + * \param[in] bin Timebin in which particle is currently. If left -1, function + * will determine bin by itself. + * + * \return void + */ +void timebin_remove_particle(struct TimeBinData *tbData, int idx, int bin) +{ + int p = tbData->ActiveParticleList[idx]; + tbData->ActiveParticleList[idx] = -1; + + if(bin == -1) + { + if(tbData == &TimeBinsGravity) + bin = P[p].TimeBinGrav; + else + bin = P[p].TimeBinHydro; + } + + tbData->TimeBinCount[bin]--; + + if(p >= 0) + { + int prev = tbData->PrevInTimeBin[p]; + int next = tbData->NextInTimeBin[p]; + + if(prev >= 0) + tbData->NextInTimeBin[prev] = next; + if(next >= 0) + tbData->PrevInTimeBin[next] = prev; + + if(tbData->FirstInTimeBin[bin] == p) + tbData->FirstInTimeBin[bin] = next; + if(tbData->LastInTimeBin[bin] == p) + tbData->LastInTimeBin[bin] = prev; + } +} + +/* \brief Inserts a particle into the timebin struct behind another already + * existing particle. + * + * \param[in, out] tbData Time bin structure to be operated on. + * \param[in] i_new New index in linked lists of time bin data. + * \param[in] i_old old index in linked lists of time bin data. + * \param[in] timeBin Time bin to which it should be added. + * \param[in] addToListOfActiveParticles Flag if particle should be added as + * an active particle. + * + * \return void + */ +void timebin_add_particle(struct TimeBinData *tbData, int i_new, int i_old, int timeBin, int addToListOfActiveParticles) +{ + tbData->TimeBinCount[timeBin]++; + + if(i_old < 0) + { + /* if we don't have an existing particle to add if after, let's take the last one in this timebin */ + i_old = tbData->LastInTimeBin[timeBin]; + + if(i_old < 0) + { + /* the timebin is empty at the moment, so just add the new particle */ + tbData->FirstInTimeBin[timeBin] = i_new; + tbData->LastInTimeBin[timeBin] = i_new; + tbData->NextInTimeBin[i_new] = -1; + tbData->PrevInTimeBin[i_new] = -1; + } + } + + if(i_old >= 0) + { + /* otherwise we added it already */ + tbData->PrevInTimeBin[i_new] = i_old; + tbData->NextInTimeBin[i_new] = tbData->NextInTimeBin[i_old]; + if(tbData->NextInTimeBin[i_old] >= 0) + tbData->PrevInTimeBin[tbData->NextInTimeBin[i_old]] = i_new; + tbData->NextInTimeBin[i_old] = i_new; + if(tbData->LastInTimeBin[timeBin] == i_old) + tbData->LastInTimeBin[timeBin] = i_new; + } + + if(addToListOfActiveParticles) + { + tbData->ActiveParticleList[tbData->NActiveParticles] = i_new; + tbData->NActiveParticles++; + } +} + +/*! \brief Removes active particles that have ID and Mass 0, i.e. that were + * flagged as deleted from time bin data structure. + * + * \param[in, out] tbData Time bin data structure to be operated on. + * + * \return void + */ +void timebin_cleanup_list_of_active_particles(struct TimeBinData *tbData) +{ + int idx, i; + for(idx = 0; idx < tbData->NActiveParticles; idx++) + { + i = tbData->ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].ID == 0 && P[i].Mass == 0) + timebin_remove_particle(tbData, idx, -1); + } +} + +#ifdef USE_SFR +/*! \brief Updates TimeBinSfr when a gas cell changes timebin. + * + * \param[in] p Index of cell in SphP array. + * \param[in] timeBin_old Old time bin. + * \param[in] timeBin_new New time bin. + * + * \return void + */ +void timebin_move_sfr(int p, int timeBin_old, int timeBin_new) +{ + TimeBinSfr[timeBin_old] -= SphP[p].Sfr; + TimeBinSfr[timeBin_new] += SphP[p].Sfr; +} +#endif /* #ifdef USE_SFR */ + +/*! \brief Crates list of active particles up to a specified timebin. + * + * \param[in, out] tbData Time bin data to be operated on. + * \param[in] timebin Up to which timebin should particles be included. + * + * \return void + */ +void timebin_make_list_of_active_particles_up_to_timebin(struct TimeBinData *tbData, int timebin) +{ + int tbin; + tbData->NActiveParticles = 0; + for(tbin = timebin; tbin >= 0; tbin--) + timebin_add_particles_of_timebin_to_list_of_active_particles(tbData, tbin); +} + +/*! \brief Add particles of a specific timebin to active particle list. + * + * \param[in, out] tbData Time bin data to be operated on. + * \param[in] timebin Time bin which should be included. + * + * \return void + */ +void timebin_add_particles_of_timebin_to_list_of_active_particles(struct TimeBinData *tbData, int timebin) +{ + int i; + for(i = tbData->FirstInTimeBin[timebin]; i >= 0; i = tbData->NextInTimeBin[i]) + if(!(P[i].ID == 0 && P[i].Mass == 0)) + { + tbData->ActiveParticleList[tbData->NActiveParticles] = i; + tbData->NActiveParticles++; + } +} diff --git a/src/amuse/community/arepo/src/time_integration/timestep.h b/src/amuse/community/arepo/src/time_integration/timestep.h new file mode 100644 index 0000000000..c110178a31 --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/timestep.h @@ -0,0 +1,88 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/timestep.h + * \date 05/2018 + * \brief Header for timestep criteria. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef TIMESTEP_H +#define TIMESTEP_H + +#include "../main/allvars.h" + +#ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME +typedef long long integertime; +#define TIMEBINS 60 +#define TIMEBASE \ + (((long long)1) << TIMEBINS) /* The simulated timespan is mapped onto the integer interval [0,TIMESPAN], \ + * where TIMESPAN needs to be a power of 2. */ +#else /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME */ +typedef int integertime; +#define TIMEBINS 29 +#define TIMEBASE (1 << TIMEBINS) +#endif /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME #else */ + +/*! \brief Linked list for particles in specific timebin. + */ +struct TimeBinData +{ + int NActiveParticles; + long long GlobalNActiveParticles; + int *ActiveParticleList; + int TimeBinCount[TIMEBINS]; + + int FirstInTimeBin[TIMEBINS]; + int LastInTimeBin[TIMEBINS]; + int *NextInTimeBin; + int *PrevInTimeBin; + char Name[100]; + int *MaxPart; +}; + +void find_timesteps_without_gravity(void); +void update_timesteps_from_gravity(void); +integertime get_timestep_gravity(int p); +integertime get_timestep_hydro(int p); +integertime get_timestep_pm(void); +int test_if_grav_timestep_is_too_large(int p, int bin); +void validate_timestep(double dt, integertime ti_step, int p); +int get_timestep_bin(integertime ti_step); +double get_time_difference_in_Gyr(double a0, double a1); + +/* TimeBinData stuff */ +void timebins_init(struct TimeBinData *tbData, const char *name, int *MaxPart); +void timebins_allocate(struct TimeBinData *tbData); +void timebins_reallocate(struct TimeBinData *tbData); +void timebins_get_bin_and_do_validity_checks(integertime ti_step, int *bin_new, int bin_old); +void timebin_move_particle(struct TimeBinData *tbData, int p, int timeBin_old, int timeBin_new); +void timebin_add_particle(struct TimeBinData *tbData, int i_new, int i_old, int timeBin, int addToListOfActiveParticles); +void timebin_remove_particle(struct TimeBinData *tbData, int idx, int bin); +void timebin_cleanup_list_of_active_particles(struct TimeBinData *tbData); +void timebin_move_sfr(int p, int timeBin_old, int timeBin_new); +void timebin_make_list_of_active_particles_up_to_timebin(struct TimeBinData *tbData, int timebin); +void timebin_add_particles_of_timebin_to_list_of_active_particles(struct TimeBinData *tbData, int timebin); + +#endif /* TIMESTEP */ diff --git a/src/amuse/community/arepo/src/time_integration/timestep_treebased.c b/src/amuse/community/arepo/src/time_integration/timestep_treebased.c new file mode 100644 index 0000000000..1e58a240cb --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/timestep_treebased.c @@ -0,0 +1,494 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/timestep_treebased.c + * \date 05/2018 + * \brief Algorithm to compute non-local time-step criterion. + * \details This is necessary for local time-stepping if material that + * would require a short time-step is arriving in cells that + * would formally be integrated at a large time-step. + * contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void tree_based_timesteps(void) + * int timestep_evaluate(int target, int mode, int threadid) + * void tree_based_timesteps_setsoundspeeds(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef TREE_BASED_TIMESTEPS +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Vel[3]; + MyFloat Csnd; + MyFloat cellrad; + MyFloat CurrentMaxTiStep; + + int Firstnode; /* this is needed as part of the communication alogorithm */ +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + int k; + + for(k = 0; k < 3; k++) + { + in->Pos[k] = P[i].Pos[k]; + in->Vel[k] = P[i].Vel[k]; + } + + in->Csnd = SphP[i].Csnd; + in->cellrad = get_cell_radius(i); + in->CurrentMaxTiStep = SphP[i].CurrentMaxTiStep; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat CurrentMaxTiStep; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + SphP[i].CurrentMaxTiStep = out->CurrentMaxTiStep; + } + else /* combine */ + { + if(SphP[i].CurrentMaxTiStep > out->CurrentMaxTiStep) + SphP[i].CurrentMaxTiStep = out->CurrentMaxTiStep; + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int idx; + + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= TimeBinsHydro.NActiveParticles) + break; + + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].Mass == 0 && P[i].ID == 0) + continue; + + timestep_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + timestep_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Main function to call tree-based timesteps. + * + * This function is called in find_timesteps_without_gravity() (timestep.c). + * + * \return void + */ +void tree_based_timesteps(void) +{ + CPU_Step[CPU_MISC] += measure_time(); + + tree_based_timesteps_setsoundspeeds(); + + generic_set_MaxNexport(); + + double t0 = second(); + + generic_comm_pattern(TimeBinsHydro.NActiveParticles, kernel_local, kernel_imported); + + double t1 = second(); + + mpi_printf("TIMESTEPS: timestep-treewalk: sec=%g\n", timediff(t0, t1)); + + CPU_Step[CPU_TREE_TIMESTEPS] += measure_time(); +} + +/*! \brief The 'core' of the tree-based timestep computation. + * + * A target particle is specified which may either be local, or reside in the + * communication buffer. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return cost, i.e. number of nodes that had to be opened. + */ +int timestep_evaluate(int target, int mode, int threadid) +{ + int k, cost = 0, numnodes, *firstnode; + MyDouble *pos; + MyFloat *vel; + double dxp, dxm, dyp, dym, dzp, dzm, pos_m[3], pos_p[3]; + int no, p; + struct NgbNODE *current; + double dx, dy, dz, dist, csnd, cellrad, xtmp, ytmp, ztmp; + + data_out out; + data_in local, *target_data; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + vel = target_data->Vel; + csnd = target_data->Csnd; + cellrad = target_data->cellrad; + + out.CurrentMaxTiStep = target_data->CurrentMaxTiStep; + + pos_m[0] = pos[0] - boxSize_X; + pos_p[0] = pos[0] + boxSize_X; + pos_m[1] = pos[1] - boxSize_Y; + pos_p[1] = pos[1] + boxSize_Y; + pos_m[2] = pos[2] - boxSize_Z; + pos_p[2] = pos[2] + boxSize_Z; + + double atimeinv; + if(All.ComovingIntegrationOn) + atimeinv = 1 / All.Time; + else + atimeinv = 1.0; + + /* Now start the actual tree-walk computation for this particle */ + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Ngb_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Ngb_Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + cost++; + + if(no < Ngb_MaxPart) /* single particle */ + { + p = no; + no = Ngb_Nextnode[no]; + + if(P[p].Type > 0) + continue; + + if(P[p].Mass == 0 && P[p].ID == 0) /* skip eliminated cells */ + continue; + + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + dx = NEAREST_X(P[p].Pos[0] - pos[0]); + dy = NEAREST_Y(P[p].Pos[1] - pos[1]); + dz = NEAREST_Z(P[p].Pos[2] - pos[2]); + + dist = sqrt(dx * dx + dy * dy + dz * dz); + + if(dist > 0) + { + double vsig = csnd + SphP[p].Csnd - + ((P[p].Vel[0] - vel[0]) * dx + (P[p].Vel[1] - vel[1]) * dy + (P[p].Vel[2] - vel[2]) * dz) / dist; + + if(vsig > 0) + { + dist += cellrad; /* take one cell radius as minimum distance in order to protect against unreasonably small steps + if two mesh-generating points are extremely close */ + if(out.CurrentMaxTiStep > dist / vsig) + out.CurrentMaxTiStep = dist / vsig; + } + } + } + else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + current = &Ngb_Nodes[no]; + + if(current->Ti_Current != All.Ti_Current) + { + drift_node(current, All.Ti_Current); + } + + if(pos[0] > current->u.d.range_max[0] && pos_m[0] < current->u.d.range_min[0]) + { + dxp = pos[0] - current->u.d.range_max[0]; + dxm = pos_m[0] - current->u.d.range_min[0]; /* negative */ + } + else if(pos_p[0] > current->u.d.range_max[0] && pos[0] < current->u.d.range_min[0]) + { + dxp = pos_p[0] - current->u.d.range_max[0]; + dxm = pos[0] - current->u.d.range_min[0]; /* negative */ + } + else + dxp = dxm = 0; + + if(pos[1] > current->u.d.range_max[1] && pos_m[1] < current->u.d.range_min[1]) + { + dyp = pos[1] - current->u.d.range_max[1]; + dym = pos_m[1] - current->u.d.range_min[1]; /* negative */ + } + else if(pos_p[1] > current->u.d.range_max[1] && pos[1] < current->u.d.range_min[1]) + { + dyp = pos_p[1] - current->u.d.range_max[1]; + dym = pos[1] - current->u.d.range_min[1]; /* negative */ + } + else + dyp = dym = 0; + + if(pos[2] > current->u.d.range_max[2] && pos_m[2] < current->u.d.range_min[2]) + { + dzp = pos[2] - current->u.d.range_max[2]; + dzm = pos_m[2] - current->u.d.range_min[2]; /* negative */ + } + else if(pos_p[2] > current->u.d.range_max[2] && pos[2] < current->u.d.range_min[2]) + { + dzp = pos_p[2] - current->u.d.range_max[2]; + dzm = pos[2] - current->u.d.range_min[2]; /* negative */ + } + else + dzp = dzm = 0; + + double vsig = csnd + ExtNgb_Nodes[no].MaxCsnd; + + int flag = 0; + + if(dxp + cellrad < out.CurrentMaxTiStep * (vsig + (ExtNgb_Nodes[no].vmax[0] * atimeinv - vel[0]))) + flag++; + else if(-dxm + cellrad < out.CurrentMaxTiStep * (vsig - (ExtNgb_Nodes[no].vmin[0] * atimeinv - vel[0]))) + flag++; + + if(dyp + cellrad < out.CurrentMaxTiStep * (vsig + (ExtNgb_Nodes[no].vmax[1] * atimeinv - vel[1]))) + flag++; + else if(-dym + cellrad < out.CurrentMaxTiStep * (vsig - (ExtNgb_Nodes[no].vmin[1] * atimeinv - vel[1]))) + flag++; + + if(dzp + cellrad < out.CurrentMaxTiStep * (vsig + (ExtNgb_Nodes[no].vmax[2] * atimeinv - vel[2]))) + flag++; + else if(-dzm + cellrad < out.CurrentMaxTiStep * (vsig - (ExtNgb_Nodes[no].vmin[2] * atimeinv - vel[2]))) + flag++; + + if(flag >= 3) + { + /* need to open */ + no = current->u.d.nextnode; + continue; + } + + /* in this case the node can be discarded */ + no = current->u.d.sibling; + continue; + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == 1"); + + if(target >= 0) /* if no target is given, export will not occur */ + ngb_treefind_export_node_threads(no, target, threadid, 0); + + no = Ngb_Nextnode[no - Ngb_MaxNodes]; + continue; + } + } + } + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return cost; +} + +/*! \brief Sets local sound speed and time-step limits from local conditions. + * + * This is a sort of initialization of the tree-based time-steps algorithm. + * + * \return void + */ +void tree_based_timesteps_setsoundspeeds(void) +{ + int idx, i; + double rad, csnd; + double hubble_a, atime; + + if(All.ComovingIntegrationOn) + { + hubble_a = hubble_function(All.Time); + atime = All.Time; + } + else + hubble_a = atime = 1; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + csnd = get_sound_speed(i); + + if(csnd <= 1.0e-30) + csnd = 1.0e-30; + + SphP[i].Csnd = csnd; + + rad = get_cell_radius(i); + +#ifdef VORONOI_STATIC_MESH + csnd += sqrt(P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]) / All.cf_atime; +#else /* #ifdef VORONOI_STATIC_MESH */ + csnd += sqrt((P[i].Vel[0] - SphP[i].VelVertex[0]) * (P[i].Vel[0] - SphP[i].VelVertex[0]) + + (P[i].Vel[1] - SphP[i].VelVertex[1]) * (P[i].Vel[1] - SphP[i].VelVertex[1]) + + (P[i].Vel[2] - SphP[i].VelVertex[2]) * (P[i].Vel[2] - SphP[i].VelVertex[2])) / + All.cf_atime; +#endif /* #ifdef VORONOI_STATIC_MESH #else */ + + SphP[i].CurrentMaxTiStep = rad / csnd; + + /* note: for cosmological integration, CurrentMaxTiStep stores 1/a times the maximum allowed physical timestep */ + + if(SphP[i].CurrentMaxTiStep >= All.MaxSizeTimestep / (atime * hubble_a) / All.CourantFac) + SphP[i].CurrentMaxTiStep = All.MaxSizeTimestep / (atime * hubble_a) / All.CourantFac; + } +} + +#endif /* #ifdef TREE_BASED_TIMESTEPS */ diff --git a/src/amuse/community/arepo/src/utils/allocate.c b/src/amuse/community/arepo/src/utils/allocate.c new file mode 100644 index 0000000000..234ec06041 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/allocate.c @@ -0,0 +1,133 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/allocate.c + * \date 05/2018 + * \brief Functions to allocate and reallocate global arrays. + * \details contains functions + * void allocate_memory(void) + * void reallocate_memory_maxpart(void) + * void reallocate_memory_maxpartsph(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 03.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Allocates memory for global arrays. + * + * This routine allocates memory for + * - particle storage, both the collisionless and the cells (SPH particles), + * - the ordered binary tree of the timeline, + * - communication buffers. + * + * \return void + */ +void allocate_memory(void) +{ + int NTaskTimesThreads; + + NTaskTimesThreads = MaxThreads * NTask; + + Exportflag = (int *)mymalloc("Exportflag", NTaskTimesThreads * sizeof(int)); + Exportindex = (int *)mymalloc("Exportindex", NTaskTimesThreads * sizeof(int)); + Exportnodecount = (int *)mymalloc("Exportnodecount", NTaskTimesThreads * sizeof(int)); + + Send = (struct send_recv_counts *)mymalloc("Send", sizeof(struct send_recv_counts) * NTask); + Recv = (struct send_recv_counts *)mymalloc("Recv", sizeof(struct send_recv_counts) * NTask); + + TasksThatSend = (int *)mymalloc("TasksThatSend", sizeof(int) * NTask); + TasksThatRecv = (int *)mymalloc("TasksThatRecv", sizeof(int) * NTask); + + Send_count = (int *)mymalloc("Send_count", sizeof(int) * NTaskTimesThreads); + Send_offset = (int *)mymalloc("Send_offset", sizeof(int) * NTaskTimesThreads); + Recv_count = (int *)mymalloc("Recv_count", sizeof(int) * NTask); + Recv_offset = (int *)mymalloc("Recv_offset", sizeof(int) * NTask); + + Send_count_nodes = (int *)mymalloc("Send_count_nodes", sizeof(int) * NTask); + Send_offset_nodes = (int *)mymalloc("Send_offset_nodes", sizeof(int) * NTask); + Recv_count_nodes = (int *)mymalloc("Recv_count_nodes", sizeof(int) * NTask); + Recv_offset_nodes = (int *)mymalloc("Recv_offset_nodes", sizeof(int) * NTask); + + Mesh_Send_count = (int *)mymalloc("Mesh_Send_count", sizeof(int) * NTask); + Mesh_Send_offset = (int *)mymalloc("Mesh_Send_offset", sizeof(int) * NTask); + Mesh_Recv_count = (int *)mymalloc("Mesh_Recv_count", sizeof(int) * NTask); + Mesh_Recv_offset = (int *)mymalloc("Mesh_Recv_offset", sizeof(int) * NTask); + + Force_Send_count = (int *)mymalloc("Force_Send_count", sizeof(int) * NTask); + Force_Send_offset = (int *)mymalloc("Force_Send_offset", sizeof(int) * NTask); + Force_Recv_count = (int *)mymalloc("Force_Recv_count", sizeof(int) * NTask); + Force_Recv_offset = (int *)mymalloc("Force_Recv_offset", sizeof(int) * NTask); + + mpi_printf("ALLOCATE: initial allocation for MaxPart = %d\n", All.MaxPart); + P = (struct particle_data *)mymalloc_movable(&P, "P", All.MaxPart * sizeof(struct particle_data)); + + mpi_printf("ALLOCATE: initial allocation for MaxPartSph = %d\n", All.MaxPartSph); + SphP = (struct sph_particle_data *)mymalloc_movable(&SphP, "SphP", All.MaxPartSph * sizeof(struct sph_particle_data)); + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + PartSpecialListGlobal = (struct special_particle_data *)mymalloc_movable(&PartSpecialListGlobal, "PartSpecialListGlobal", + All.MaxPartSpecial * sizeof(struct special_particle_data)); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + + timebins_allocate(&TimeBinsHydro); + timebins_allocate(&TimeBinsGravity); + + /* set to zero */ + memset(P, 0, All.MaxPart * sizeof(struct particle_data)); + memset(SphP, 0, All.MaxPartSph * sizeof(struct sph_particle_data)); +} + +/*! \brief Reallocates memory for particle data. + * + * Reallocates memory for P and TimeBinsGravity arrays. + * + * \return void + */ +void reallocate_memory_maxpart(void) +{ + mpi_printf("ALLOCATE: Changing to MaxPart = %d\n", All.MaxPart); + + P = (struct particle_data *)myrealloc_movable(P, All.MaxPart * sizeof(struct particle_data)); + timebins_reallocate(&TimeBinsGravity); +} + +/*! \brief Reallocate memory for cell data. + * + * Reallocates memory for cells in SphP and TimeBinsHydro arrays. + * + * \return void + */ +void reallocate_memory_maxpartsph(void) +{ + mpi_printf("ALLOCATE: Changing to MaxPartSph = %d\n", All.MaxPartSph); + + SphP = (struct sph_particle_data *)myrealloc_movable(SphP, All.MaxPartSph * sizeof(struct sph_particle_data)); + timebins_reallocate(&TimeBinsHydro); +} diff --git a/src/amuse/community/arepo/src/utils/debug.c b/src/amuse/community/arepo/src/utils/debug.c new file mode 100644 index 0000000000..c425ce7d40 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/debug.c @@ -0,0 +1,148 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/debug.c + * \date 05/2018 + * \brief Print relevant information about a particle / face for + * debugging. + * \details The functions contained in this file are mostly called when a + * condition, that causes the abort of the run, is met. In that + * case, the information about the state of the particle / face + * which triggered that condition is printed to the standard + * output. + * contains functions: + * void print_particle_info(int i) + * void print_particle_info_from_ID(MyIDType ID) + * void print_state_info(struct state *st) + * void print_state_face_info(struct state_face *st) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 03.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Prints particle / cell information to standard output. + * + * \param[in] i Index of particle / cell. + * + * \return void + */ +void print_particle_info(int i) +{ + printf("Task=%d, ID=%llu, Type=%d, TimeBinGrav=%d, TimeBinHydro=%d, Mass=%g, pos=%g|%g|%g, vel=%g|%g|%g\n", ThisTask, + (unsigned long long)P[i].ID, P[i].Type, P[i].TimeBinGrav, P[i].TimeBinHydro, P[i].Mass, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2], + P[i].Vel[0], P[i].Vel[1], P[i].Vel[2]); +#ifdef PMGRID + printf("GravAccel=%g|%g|%g, GravPM=%g|%g|%g, Soft=%g, SoftType=%d, OldAcc=%g\n", P[i].GravAccel[0], P[i].GravAccel[1], + P[i].GravAccel[2], P[i].GravPM[0], P[i].GravPM[1], P[i].GravPM[2], All.ForceSoftening[P[i].SofteningType], P[i].SofteningType, + P[i].OldAcc); +#else /* #ifdef PMGRID */ + printf("GravAccel=%g|%g|%g, Soft=%g, SoftType=%d, OldAcc=%g\n", P[i].GravAccel[0], P[i].GravAccel[1], P[i].GravAccel[2], + All.ForceSoftening[P[i].SofteningType], P[i].SofteningType, P[i].OldAcc); +#endif /* #ifdef PMGRID #else */ + + if(P[i].Type == 0) + { + printf("Vol=%g, rad=%g, rho=%g, p=%g,u=%g, velVertex=%g|%g|%g, csnd=%g\n", SphP[i].Volume, get_cell_radius(i), SphP[i].Density, + SphP[i].Pressure, SphP[i].Utherm, SphP[i].VelVertex[0], SphP[i].VelVertex[1], SphP[i].VelVertex[2], get_sound_speed(i)); + printf("Center-Pos=%g|%g|%g\n", SphP[i].Center[0] - P[i].Pos[0], SphP[i].Center[1] - P[i].Pos[1], + SphP[i].Center[2] - P[i].Pos[2]); +#ifndef MHD + printf("Mom=%g|%g|%g, Energy=%g, EInt=%g, EKin=%g\n", SphP[i].Momentum[0], SphP[i].Momentum[1], SphP[i].Momentum[2], + SphP[i].Energy, SphP[i].Utherm * P[i].Mass, + 0.5 * P[i].Mass * + ((SphP[i].Momentum[0] / P[i].Mass) * (SphP[i].Momentum[0] / P[i].Mass) + + (SphP[i].Momentum[1] / P[i].Mass) * (SphP[i].Momentum[1] / P[i].Mass) + + (SphP[i].Momentum[2] / P[i].Mass) * (SphP[i].Momentum[2] / P[i].Mass))); +#else /* #ifndef MHD */ + printf("Mom=%g|%g|%g, Energy=%g, EInt=%g, EKin=%g, EB=%g\n", SphP[i].Momentum[0], SphP[i].Momentum[1], SphP[i].Momentum[2], + SphP[i].Energy, SphP[i].Utherm * P[i].Mass, + 0.5 * P[i].Mass * + ((SphP[i].Momentum[0] / P[i].Mass) * (SphP[i].Momentum[0] / P[i].Mass) + + (SphP[i].Momentum[1] / P[i].Mass) * (SphP[i].Momentum[1] / P[i].Mass) + + (SphP[i].Momentum[2] / P[i].Mass) * (SphP[i].Momentum[2] / P[i].Mass)), + 0.5 * SphP[i].Volume * (SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2])); +#endif /* #ifndef MHD #else */ + +#ifdef MHD + double err = pow(SphP[i].Volume, 1. / 3.) * fabs(SphP[i].DivB) / + sqrt(SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]); + printf("B=%g|%g|%g, divb=%g, err=%g\n", SphP[i].B[0], SphP[i].B[1], SphP[i].B[2], SphP[i].DivB, err); +#endif /* #ifdef MHD */ + +#ifdef TREE_BASED_TIMESTEPS + printf("ID=%llu SphP[p].CurrentMaxTiStep=%g\n", (unsigned long long)P[i].ID, SphP[i].CurrentMaxTiStep); +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } +} + +/*! \brief Prints particle / cell information of the cell with a specific ID. + * + * \param[in] ID particle / cell ID. + * + * \return void + */ +void print_particle_info_from_ID(MyIDType ID) +{ + int i; + for(i = 0; i < NumPart; i++) + if(P[i].ID == ID) + print_particle_info(i); +} + +/*! \brief Prints information of the left or right state of a face to standard + * output. + * + * \param[in] st Structure containing the left or right state of a face. + * + * \return void + */ +void print_state_info(struct state *st) +{ + printf("Task=%d, ID=%llu rho=%g, p=%g, vel=%g|%g|%g, velVertex=%g|%g|%g\n", ThisTask, (unsigned long long)st->ID, st->rho, st->press, + st->velx, st->vely, st->velz, st->velVertex[0], st->velVertex[1], st->velVertex[2]); + printf("dx=%g, dy=%g, dz=%g, dt_half=%g\n", st->dx, st->dy, st->dz, st->dt_half); + printf("timeBin=%d, volume=%g, activearea=%g, surfacearea=%g, csnd=%g\n", st->timeBin, st->volume, st->activearea, st->surfacearea, + st->csnd); +#ifdef MHD + printf("B=%g|%g|%g\n", st->Bx, st->By, st->Bz); +#endif /* #ifdef MHD */ +} + +/*! \brief Prints information of the state the of a face as determined by + * the Riemman solver to standard output. + * + * \param[in] st Structure containing the state of a face after the solution + * of the Riemann problem. + * + * \return void + */ +void print_state_face_info(struct state_face *st) +{ + printf("rho=%g, p=%g, vel=%g|%g|%g\n", st->rho, st->press, st->velx, st->vely, st->velz); +} diff --git a/src/amuse/community/arepo/src/utils/dtypes.h b/src/amuse/community/arepo/src/utils/dtypes.h new file mode 100644 index 0000000000..816412b529 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/dtypes.h @@ -0,0 +1,195 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/dtypes.h + * \date 05/2018 + * \brief Definition of intrinsic datatypes. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef DTYPES_H +#define DTYPES_H + +#ifndef FFTW +#define CONCAT(prefix, name) prefix##name +#ifdef DOUBLEPRECISION_FFTW +#define FFTW(x) CONCAT(fftw_, x) +#else /* #ifdef DOUBLEPRECISION_FFTW */ +#define FFTW(x) CONCAT(fftwf_, x) +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ +#endif /* #ifndef FFTW */ + +#ifndef LONGIDS +typedef unsigned int MyIDType; +#define MPI_MYIDTYPE MPI_UNSIGNED +#else /* #ifndef LONGIDS */ +typedef unsigned long long MyIDType; +#define MPI_MYIDTYPE MPI_UNSIGNED_LONG_LONG +#endif /* #ifndef LONGIDS #else */ + +#ifndef DOUBLEPRECISION /* default is single-precision */ +typedef float MySingle; +typedef float MyFloat; +typedef float MyDouble; +#define MPI_MYFLOAT MPI_FLOAT +#define MPI_MYDOUBLE MPI_FLOAT +#else /* #ifndef DOUBLEPRECISION */ +#if(DOUBLEPRECISION == 2) /* mixed precision */ +typedef float MySingle; +typedef float MyFloat; +typedef double MyDouble; +#define MPI_MYFLOAT MPI_FLOAT +#define MPI_MYDOUBLE MPI_DOUBLE +#else /* #if (DOUBLEPRECISION == 2) */ +#if(DOUBLEPRECISION == 3) /* mixed precision, fewer single precision variables */ +typedef float MySingle; +typedef double MyFloat; +typedef double MyDouble; +#define MPI_MYFLOAT MPI_FLOAT +#define MPI_MYDOUBLE MPI_DOUBLE +#else /* #if (DOUBLEPRECISION == 3) */ +/* everything double-precision */ +typedef double MySingle; +typedef double MyFloat; +typedef double MyDouble; +#define MPI_MYFLOAT MPI_DOUBLE +#define MPI_MYDOUBLE MPI_DOUBLE +#endif /* #if (DOUBLEPRECISION == 3) #else */ +#endif /* #if (DOUBLEPRECISION == 2) #else */ +#endif /* #ifndef DOUBLEPRECISION #else */ + +#ifdef OUTPUT_IN_DOUBLEPRECISION +typedef double MyOutputFloat; +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ +typedef float MyOutputFloat; +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + +#ifdef INPUT_IN_DOUBLEPRECISION +typedef double MyInputFloat; +#else /* #ifdef INPUT_IN_DOUBLEPRECISION */ +typedef float MyInputFloat; +#endif /* #ifdef INPUT_IN_DOUBLEPRECISION #else */ + +#ifndef NGB_TREE_DOUBLEPRECISION +typedef float MyNgbTreeFloat; +#define MAX_NGBRANGE_NUMBER MAX_FLOAT_NUMBER +#else /* #ifndef NGB_TREE_DOUBLEPRECISION */ +typedef double MyNgbTreeFloat; +#define MAX_NGBRANGE_NUMBER MAX_DOUBLE_NUMBER +#endif /* #ifndef NGB_TREE_DOUBLEPRECISION #else */ + +#if defined(PMGRID) +#include + +#ifdef DOUBLEPRECISION_FFTW +typedef double fft_real; +typedef fftw_complex fft_complex; +#else /* #ifdef DOUBLEPRECISION_FFTW */ +typedef float fft_real; +typedef fftwf_complex fft_complex; +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ +typedef ptrdiff_t fft_ptrdiff_t; + +typedef struct +{ + int NgridX, NgridY, NgridZ; + int Ngridz, Ngrid2; + + FFTW(plan) forward_plan_zdir; + FFTW(plan) forward_plan_xdir; + FFTW(plan) forward_plan_ydir; + + FFTW(plan) backward_plan_zdir; + FFTW(plan) backward_plan_ydir; + FFTW(plan) backward_plan_xdir; + +#ifndef FFT_COLUMN_BASED + + int *slab_to_task; /*!< Maps a slab index to the task responsible for the slab */ + int *slabs_x_per_task; + int *first_slab_x_of_task; /*!< Array containing the index of the first slab of each task */ + int *slabs_y_per_task; /*!< Array containing the number of slabs each task is responsible for */ + int *first_slab_y_of_task; /*!< Array containing the index of the first slab of each task */ + + int nslab_x, slabstart_x, nslab_y, slabstart_y; + int largest_x_slab; /*!< size of the largest slab in x direction */ + int largest_y_slab; /*!< size of the largest slab in y direction */ + +#else /* #ifndef FFT_COLUMN_BASED */ + + size_t max_datasize; + size_t fftsize; + + int base_firstcol, base_ncol, base_lastcol; + int transposed_firstcol, transposed_ncol; + int second_transposed_firstcol, second_transposed_ncol; + size_t second_transposed_ncells; + + int firstcol_XZ, ncol_XZ; + int firstcol_YZ, ncol_YZ; + + int pivotcol; /* to go from column number to task */ + int avg; + int tasklastsection; + + size_t *offsets_send_A; + size_t *offsets_recv_A; + size_t *offsets_send_B; + size_t *offsets_recv_B; + size_t *offsets_send_C; + size_t *offsets_recv_C; + size_t *offsets_send_D; + size_t *offsets_recv_D; + size_t *offsets_send_13; + size_t *offsets_recv_13; + size_t *offsets_send_23; + size_t *offsets_recv_23; + size_t *offsets_send_13back; + size_t *offsets_recv_13back; + size_t *offsets_send_23back; + size_t *offsets_recv_23back; + + size_t *count_send_A; + size_t *count_recv_A; + size_t *count_send_B; + size_t *count_recv_B; + size_t *count_send_C; + size_t *count_recv_C; + size_t *count_send_D; + size_t *count_recv_D; + size_t *count_send_13; + size_t *count_recv_13; + size_t *count_send_23; + size_t *count_recv_23; + size_t *count_send_13back; + size_t *count_recv_13back; + size_t *count_send_23back; + size_t *count_recv_23back; + +#endif /* #ifndef FFT_COLUMN_BASED */ +} fft_plan; + +#endif /* #if defined(PMGRID) */ + +#endif /* #ifndef DTYPES_H */ diff --git a/src/amuse/community/arepo/src/utils/generic_comm_helpers2.h b/src/amuse/community/arepo/src/utils/generic_comm_helpers2.h new file mode 100644 index 0000000000..a159b17045 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/generic_comm_helpers2.h @@ -0,0 +1,724 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/generic_comm_helpers.h + * \date 05/2018 + * \brief Generic 'template' MPI communication structure used in many + * parts of the code. + * \details Usage: + * see e.g. src/init/density.c + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifdef USE_SUBCOMM_COMMUNICATOR +#define MYCOMMUNICATOR SubComm +#define MyThisTask SubThisTask +#define MyNTask SubNTask +#else /* #ifdef USE_SUBCOMM_COMMUNICATOR */ +#define MYCOMMUNICATOR MPI_COMM_WORLD +#define MyThisTask ThisTask +#define MyNTask NTask +#endif /* #ifdef USE_SUBCOMM_COMMUNICATOR #else */ + +#define EXTRA_SPACE 16384 + +typedef struct datanodelist datanodelist; +typedef struct data_partlist data_partlist; + +static size_t ExportSpace; +static size_t MinSpace; +static int NextParticle; +static int Nexport, Nimport; +static int NexportNodes, NimportNodes; +static long long SumNexport; +static int *NodeDataIn; +static int *NodeDataGet; + +static char callorigin[1000]; + +#ifdef USE_DSDE +static void generic_prepare_import_counts_ibarrier(void); +#endif /* #ifdef USE_DSDE */ + +#ifdef USE_INLINED_IBARRIER +static void generic_prepare_import_counts_inlined_ibarrier(void); +#endif /* #ifdef USE_INLINED_IBARRIER */ + +#define generic_set_MaxNexport(...) \ + { \ + generic_set_info(__FUNCTION__, __FILE__, __LINE__); \ + } + +/*! \brief This function determines how much buffer space we may use based on + * the memory that is locally still free, and it computes how much + * memory may at most be needed to process a single particle. We will + * only continue with a particle if this can still be safely processed. + */ +static void generic_set_info(const char *func, const char *file, int line) +{ + ExportSpace = 0.3 * (FreeBytes); /* we just grab at most 30% of the still available memory here */ + ExportSpace /= NUM_THREADS; + ExportSpace -= NumPart * sizeof(int); /* to account for the neighbor list buffer that every thread allocated */ + + /* make the size a multiple both of data_partlist and datanodelist */ + ExportSpace /= (sizeof(data_partlist) * sizeof(datanodelist)); + ExportSpace *= (sizeof(data_partlist) * sizeof(datanodelist)); + + MinSpace = + (MyNTask - 1) * (sizeof(data_partlist) + sizeof(data_in) + sizeof(data_out)) + NTopleaves * (sizeof(datanodelist) + sizeof(int)); + + sprintf(callorigin, "%s|%d|", file, line); + +#ifdef VERBOSE + mpi_printf( + "GENERIC: function %s(), file %s, line %d: MinSpace = %g MB NTopleaves = %d ExportSpace = %g MB sizeof(data_in)=%d " + "sizeof(data_out)=%d\n", + func, file, line, MinSpace / (1024.0 * 1024.0), NTopleaves, ExportSpace / (1024.0 * 1024.0)), + (int)sizeof(data_in), (int)sizeof(data_out); +#endif /* #ifdef VERBOSE */ + + if(ExportSpace < MinSpace) + terminate( + "Bummer. Can't even safely process a single particle for the available memory. FreeBytes=%lld ExportSpace=%lld " + "MinSpace=%lld MyNTask=%d NTopleaves=%d", + (long long)FreeBytes, (long long)ExportSpace, (long long)MinSpace, MyNTask, NTopleaves); +} + +/*! \brief This function does the memory allocation at the beginning of a loop + * over the remaining local particles. The fields PartList[] and + * NodeList[] share the buffer space of size "ExportSpace" (in bytes). + * Here PartList will be filled in from the beginning, while NodeList + * will be filled in from the end. Since we do not know a priory the + * relative share of these two fields, we can make optimum use of + * the available space in this way. + */ +static void generic_alloc_partlist_nodelist_ngblist_threadbufs(void) +{ + for(int i = 0; i < NUM_THREADS; i++) + { + Thread[i].Nexport = 0; + Thread[i].NexportNodes = 0; + Thread[i].ExportSpace = ExportSpace; + Thread[i].InitialSpace = ExportSpace; + Thread[i].ItemSize = (sizeof(data_partlist) + sizeof(data_in) + sizeof(data_out)); + + Thread[i].PartList = (struct data_partlist *)mymalloc_movable_g(&Thread[i].PartList, "PartList", ExportSpace); + /* note: the NodeList array will be attached to the end of this buffer, growing backwards */ + /* Thread[i].NodeList = (struct datanodelist *) (((char *) Thread[i].PartList) + InitialSpace); + */ + Thread[i].Ngblist = (int *)mymalloc_movable_g(&Thread[i].Ngblist, "Ngblist", NumPart * sizeof(int)); + Thread[i].R2list = (double *)mymalloc_movable_g(&Thread[i].R2list, "R2list", NumPart * sizeof(double)); + Thread[i].Exportflag = Exportflag + i * ((((MyNTask - 1) / 16) + 1) * 16); + } +} + +/*! \brief The corresponding deallocation routine. + */ +static void generic_free_partlist_nodelist_ngblist_threadbufs(void) +{ + for(int i = NUM_THREADS - 1; i >= 0; i--) + { + myfree(Thread[i].R2list); + myfree(Thread[i].Ngblist); + myfree(Thread[i].PartList); + Thread[i].R2list = NULL; + Thread[i].Ngblist = NULL; + Thread[i].PartList = NULL; + } +} + +static void generic_prepare_export_counts(void) +{ + for(int j = 0; j < MyNTask; j++) + { + Send[j].Count = 0; + Send[j].CountNodes = 0; + } + + Nexport = 0; + NexportNodes = 0; + + for(int i = 0; i < NUM_THREADS; i++) + { + for(int j = 0; j < Thread[i].Nexport; j++) + Send[Thread[i].PartList[j].Task].Count++; + + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[i].PartList) + Thread[i].InitialSpace); + + for(int j = 0; j < Thread[i].NexportNodes; j++) + Send[nodelist[-1 - j].Task].CountNodes++; + + Nexport += Thread[i].Nexport; + NexportNodes += Thread[i].NexportNodes; + } + + SumNexport += Nexport; +} + +/*! \brief Establishes the Recv counts from the Send counts (effectively a big + * transpose). + */ +static void generic_prepare_import_counts(void) +{ + /* our standard approach for this is to use an all-to-all communication. For very large processor counts, + * this in principle becomes inefficient since mostly zeros need to be communicated. + * we have also two option experimental communication routines that use a sparse=communication pattern instead. + */ +#ifdef USE_DSDE + generic_prepare_import_counts_ibarrier(); +#else /* #ifdef USE_DSDE */ +#ifdef USE_INLINED_IBARRIER + generic_prepare_import_counts_inlined_ibarrier(); +#else /* #ifdef USE_INLINED_IBARRIER */ + /* the default */ + MPI_Alltoall(Send, sizeof(struct send_recv_counts), MPI_BYTE, Recv, sizeof(struct send_recv_counts), MPI_BYTE, MYCOMMUNICATOR); +#endif /* #ifdef USE_INLINED_IBARRIER #else */ +#endif /* #ifdef USE_DSDE #else */ +} + +/*! \brief Initializes offset tables that we need for the communication. + */ +static void generic_prepare_export_offsets(void) +{ + Send_offset[0] = 0; + Send_offset_nodes[0] = 0; + + for(int j = 1; j < MyNTask; j++) + { + Send_offset[j] = Send_offset[j - 1] + Send[j - 1].Count; + Send_offset_nodes[j] = Send_offset_nodes[j - 1] + Send[j - 1].CountNodes; + } +} + +/*! \brief Organizes the particle and node data for export in contiguous + * memory regions. + */ +static void generic_prepare_particle_data_for_export(void) +{ + int *rel_node_index = (int *)mymalloc_g("rel_node_index", MyNTask * sizeof(int)); + + for(int j = 0; j < MyNTask; j++) + { + Send[j].Count = 0; + Send[j].CountNodes = 0; + rel_node_index[j] = 0; + } + + for(int i = 0; i < NUM_THREADS; i++) + { + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[i].PartList) + Thread[i].InitialSpace); + + for(int j = 0, jj = 0; j < Thread[i].Nexport; j++) + { + int task = Thread[i].PartList[j].Task; + int off = Send_offset[task] + Send[task].Count++; + + int target = Thread[i].PartList[j].Index; + + particle2in(&DataIn[off], target, rel_node_index[task]); + + if(j < Thread[i].Nexport - 1) + if(Thread[i].PartList[j].Index == Thread[i].PartList[j + 1].Index) + continue; + + while(jj < Thread[i].NexportNodes && Thread[i].PartList[j].Index == nodelist[-1 - jj].Index) + { + int task = nodelist[-1 - jj].Task; + int off = Send_offset_nodes[task] + Send[task].CountNodes++; + + NodeDataIn[off] = nodelist[-1 - jj].Node; + + rel_node_index[task]++; + jj++; + } + } + } + + myfree(rel_node_index); +} + +/*! \brief Driver routine to process the results that we obtained for a + * particle from a remote processor by working on it with the supplied + * out2particle() routine. + */ +static void generic_add_results_to_local(void) +{ + for(int j = 0; j < MyNTask; j++) + Send[j].Count = 0; + + for(int i = 0; i < NUM_THREADS; i++) + for(int j = 0; j < Thread[i].Nexport; j++) + { + int task = Thread[i].PartList[j].Task; + int off = Send_offset[task] + Send[task].Count++; + + int target = Thread[i].PartList[j].Index; + + out2particle(&DataOut[off], target, MODE_IMPORTED_PARTICLES); + } +} + +/*! \brief This function is called in the actual tree walk routine to find out + * how the number and starting index of the section in the node-list + * that needs to be processed for the imported particle. + */ +static void generic_get_numnodes(int target, int *numnodes, int **firstnode) +{ + if(target == Nimport - 1) + *numnodes = NimportNodes - DataGet[target].Firstnode; + else + *numnodes = DataGet[target + 1].Firstnode - DataGet[target].Firstnode; + + *firstnode = &NodeDataGet[DataGet[target].Firstnode]; +} + +/*! \brief Calculates how many space we need to allocate to safely process a + * certain number of nodes and particles that are imported. + */ +static size_t generic_calc_import_storage(int nimport, int nimportnodes) +{ + size_t needed = nimport * sizeof(data_in) + nimportnodes * sizeof(int) + nimport * sizeof(data_out); + + /* add some extra space to not go to the last byte */ + needed += EXTRA_SPACE; + + return needed; +} + +/*! \brief This routine carries out the communication step in several phases + * if needed. + */ +static void generic_multiple_phases(void (*kernel)(void)) +{ + int ncycles; + + for(int ngrpstart = 1; ngrpstart < (1 << PTask); ngrpstart += ncycles) + { + /* now decide how many cycles we can process in this iteration */ + ncycles = (1 << PTask) - ngrpstart; + + do + { + Nimport = 0; + NimportNodes = 0; + + for(int ngrp = ngrpstart; ngrp < ngrpstart + ncycles; ngrp++) + { + int recvTask = MyThisTask ^ ngrp; + + if(recvTask < MyNTask) + { + if(Recv[recvTask].Count > 0) + { + Nimport += Recv[recvTask].Count; + NimportNodes += Recv[recvTask].CountNodes; + } + } + } + + int flag = 0, flagall; + + if(generic_calc_import_storage(Nimport, NimportNodes) > FreeBytes) + flag = 1; + + MPI_Allreduce(&flag, &flagall, 1, MPI_INT, MPI_MAX, MYCOMMUNICATOR); + + if(flagall) + ncycles /= 2; + else + break; + } + while(ncycles > 0); + + if(ncycles == 0) + terminate( + "Seems like we can't even do one cycle: ncycles=%d ngrpstart=%d Nimport=%d NimportNodes=%d FreeBytes=%lld needed " + "storage=%lld", + ncycles, ngrpstart, Nimport, NimportNodes, (long long)FreeBytes, + (long long)generic_calc_import_storage(Nimport, NimportNodes)); + + if(ngrpstart == 1 && ncycles != ((1 << PTask) - ngrpstart) && MyThisTask == 0) + warn("need multiple import/export phases to avoid memory overflow"); + + /* now allocated the import and results buffers */ + + DataGet = (data_in *)mymalloc_movable_g(&DataGet, "DataGet", Nimport * sizeof(data_in)); + NodeDataGet = (int *)mymalloc_movable_g(&NodeDataGet, "NodeDataGet", NimportNodes * sizeof(int)); + DataResult = (data_out *)mymalloc_movable_g(&DataResult, "DataResult", Nimport * sizeof(data_out)); + + Nimport = 0; + NimportNodes = 0; + + /* exchange particle data */ + for(int ngrp = ngrpstart; ngrp < ngrpstart + ncycles; ngrp++) + { + int recvTask = MyThisTask ^ ngrp; + + if(recvTask < MyNTask) + { + if(Send[recvTask].Count > 0 || Recv[recvTask].Count > 0) + { + size_t len = sizeof(data_in); + + /* get the particles */ + MPI_Sendrecv(&DataIn[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A, + &DataGet[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A, MYCOMMUNICATOR, + MPI_STATUS_IGNORE); + + /* get the nodes */ + MPI_Sendrecv(&NodeDataIn[Send_offset_nodes[recvTask]], Send[recvTask].CountNodes, MPI_INT, recvTask, TAG_GRAV_B, + &NodeDataGet[NimportNodes], Recv[recvTask].CountNodes, MPI_INT, recvTask, TAG_GRAV_B, MYCOMMUNICATOR, + MPI_STATUS_IGNORE); + + for(int k = 0; k < Recv[recvTask].Count; k++) + DataGet[Nimport + k].Firstnode += NimportNodes; + + Nimport += Recv[recvTask].Count; + NimportNodes += Recv[recvTask].CountNodes; + } + } + } + + /* now do the actual work for the imported points */ + kernel(); + + /* send the results */ + Nimport = 0; + NimportNodes = 0; + + for(int ngrp = ngrpstart; ngrp < ngrpstart + ncycles; ngrp++) + { + int recvTask = MyThisTask ^ ngrp; + if(recvTask < MyNTask) + { + if(Send[recvTask].Count > 0 || Recv[recvTask].Count > 0) + { + size_t len = sizeof(data_out); + + /* exchange the results */ + MPI_Sendrecv(&DataResult[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B, + &DataOut[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B, + MYCOMMUNICATOR, MPI_STATUS_IGNORE); + + Nimport += Recv[recvTask].Count; + NimportNodes += Recv[recvTask].CountNodes; + } + } + } + + myfree(DataResult); + myfree(NodeDataGet); + myfree(DataGet); + } +} + +/*! \brief This function deals with the communication step, and then processes + * the imported particles, and finally computes the results back. If + * there is not enough memory available to hold all the data sent to + * us from other processors, we process the incoming data in multiple + * stages, which will always be possible. + */ +static void generic_exchange(void (*kernel)(void)) +{ + /* set up Sendcount table */ + generic_prepare_export_counts(); + + /* do the all-to-all exchange so that we have the Recvcount table as well */ + generic_prepare_import_counts(); + + /* prepare offsets in export tables */ + generic_prepare_export_offsets(); + + /* allocate particle data buffers */ + DataIn = (data_in *)mymalloc_movable_g(&DataIn, "DataIn", Nexport * sizeof(data_in)); + NodeDataIn = (int *)mymalloc_movable_g(&NodeDataIn, "NodeDataIn", NexportNodes * sizeof(int)); + DataOut = (data_out *)mymalloc_movable_g(&DataOut, "DataOut", Nexport * sizeof(data_out)); + + /* prepare particle data for export */ + generic_prepare_particle_data_for_export(); + + /* export particles and process them, if needed in several installments */ + generic_multiple_phases(kernel); + + /* add the results to the local particles */ + generic_add_results_to_local(); + + myfree(DataOut); + myfree(NodeDataIn); + myfree(DataIn); +} + +/* \brief Implements a repeated loop over the local particles in the list, + * processing them with the local kernel function, until we're done or + * the export buffer is full. Then we exchange the data, and process + * the imported ones with the provided kernel. We repeat if neeed until + * all processors are done. + */ +static int generic_comm_pattern(int nactive, void (*kernel_loc)(void), void (*kernel_imp)(void)) +{ + int ndone_flag, ndone, iter = 0; + + SumNexport = 0; /* can be queried as a book-keeping variable */ + + NextParticle = 0; /* first particle index for this task */ + + do + { + iter++; + + /* allocate buffers to arrange communication */ + generic_alloc_partlist_nodelist_ngblist_threadbufs(); + + /* do local particles */ + kernel_loc(); + + /* do all necessary bookkeeping, data exchange, and processing of imported particles */ + generic_exchange(kernel_imp); + + /* free the rest of the buffers */ + generic_free_partlist_nodelist_ngblist_threadbufs(); + + /* check whether we are done */ + if(NextParticle >= nactive) + ndone_flag = 1; + else + ndone_flag = 0; + + MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MYCOMMUNICATOR); + } + while(ndone < MyNTask); + + return iter; +} + +/*! \brief Same as generic_comm_pattern but you can pass the indices of the + * particles to be processed. + */ +static int generic_comm_pattern_for_given_particles(int nactive, int indices[], void (*kernel_loc)(int, int *), + void (*kernel_imp)(void)) +{ + int ndone_flag, ndone, iter = 0; + + SumNexport = 0; /* can be queried as a book-keeping variable */ + + NextParticle = 0; /* first particle index for this task */ + + do + { + iter++; + + /* allocate buffers to arrange communication */ + generic_alloc_partlist_nodelist_ngblist_threadbufs(); + + /* do local particles */ + kernel_loc(nactive, indices); + + /* do all necessary bookkeeping, data exchange, and processing of imported particles */ + generic_exchange(kernel_imp); + + /* free the rest of the buffers */ + generic_free_partlist_nodelist_ngblist_threadbufs(); + + /* check whether we are done */ + if(NextParticle >= nactive) + ndone_flag = 1; + else + ndone_flag = 0; + + MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MYCOMMUNICATOR); + } + while(ndone < MyNTask); + + return iter; +} + +#ifdef USE_INLINED_IBARRIER +/*! \brief Can replace + * MPI_Alltoall(Send, sizeof(struct send_recv_counts), MPI_INT, Recv, + * sizeof(struct send_recv_counts), MPI_INT, MYCOMMUNICATOR); + * with a space communication pattern that effectively involves a + * home-grown non-blocking barrier to establish that we can stop + * listening. + */ +static void generic_prepare_import_counts_inlined_ibarrier(void) +{ + int nLevels = my_fls(MyNTask - 1); + int received_levels = 0, sent_levels = 0; + + int *stagelist = (int *)mymalloc("stagelist", nLevels * sizeof(int)); + for(int j = 0; j < nLevels; j++) + stagelist[j] = j; + + MPI_Request *level_requests = (MPI_Request *)mymalloc("level_requests", nLevels * sizeof(MPI_Request)); + + MPI_Request *requests = (MPI_Request *)mymalloc("requests", MyNTask * sizeof(MPI_Request)); + int n_requests = 0; + + for(int j = 0; j < MyNTask; j++) + { + if(Send[j].Count > 0) + MPI_Issend(&Send[j], sizeof(struct send_recv_counts), MPI_BYTE, j, TAG_N, MYCOMMUNICATOR, &requests[n_requests++]); + + Recv[j].Count = 0; + Recv[j].CountNodes = 0; + } + + int barrier_active = 0; + + while(1) + { + int flag; + MPI_Status status; + + MPI_Iprobe(MPI_ANY_SOURCE, TAG_N, MYCOMMUNICATOR, &flag, &status); + + if(flag) + { + int source = status.MPI_SOURCE; + int tag = status.MPI_TAG; + + MPI_Recv(&Recv[source], sizeof(struct send_recv_counts), MPI_BYTE, source, tag, MYCOMMUNICATOR, MPI_STATUS_IGNORE); + } + + MPI_Iprobe(MPI_ANY_SOURCE, TAG_BARRIER, MYCOMMUNICATOR, &flag, &status); + + if(flag) + { + int source = status.MPI_SOURCE; + int tag = status.MPI_TAG; + + int stage; + MPI_Recv(&stage, 1, MPI_INT, source, tag, MYCOMMUNICATOR, MPI_STATUS_IGNORE); + received_levels |= (1 << stage); + } + + if(barrier_active) + { + for(int stage = 0; stage < nLevels; stage++) + if(!(sent_levels & (1 << stage))) + { + int mask = ((1 << stage) - 1); + + if((mask & received_levels) == mask) + { + sent_levels |= (1 << stage); + + int target = (MyThisTask + (1 << stage)) % MyNTask; + + MPI_Issend(&stagelist[stage], 1, MPI_INT, target, TAG_BARRIER, MYCOMMUNICATOR, &level_requests[stage]); + } + } + + if(received_levels == ((1 << nLevels) - 1) && send_levels == ((1 << nLevels) - 1)) + break; + } + else + { + MPI_Testall(n_requests, requests, &flag, MPI_STATUSES_IGNORE); + + if(flag) + barrier_active = 1; + } + } + + MPI_Waitall(nLevels, level_requests, MPI_STATUSES_IGNORE); /* as we are going to free stagelist */ + + myfree(requests); + myfree(level_requests); + myfree(stagelist); +} +#endif /* #ifdef USE_INLINED_IBARRIER */ + +#ifdef USE_DSDE +/*! \brief Can replace + * MPI_Alltoall(Send, sizeof(struct send_recv_counts), MPI_INT, Recv, + * sizeof(struct send_recv_counts), MPI_INT, MYCOMMUNICATOR); + * with a space communication pattern that involves a non-blocking + * barrier (requires MPI-3.0). + */ +static int generic_prepare_import_counts_ibarrier(void) +{ + MPI_Request barrier_request; + MPI_Request *requests = (MPI_Request *)mymalloc_movable(&requests, "requests", MyNTask * sizeof(MPI_Request)); + int n_requests = 0; + + for(int j = 0; j < MyNTask; j++) + { + if(Send[j].Count > 0) + MPI_Issend(&Send[j], sizeof(struct send_recv_counts), MPI_BYTE, j, TAG_N, MYCOMMUNICATOR, &requests[n_requests++]); + + Recv[j].Count = 0; + Recv[j].CountNodes = 0; + } + + int barrier_active = 0; + + while(1) + { + int flag; + MPI_Status status; + + MPI_Iprobe(MPI_ANY_SOURCE, TAG_N, MYCOMMUNICATOR, &flag, &status); + + if(flag) + { + int source = status.MPI_SOURCE; + int tag = status.MPI_TAG; + + int count; + MPI_Get_count(&status, MPI_BYTE, &count); + + if(tag == TAG_N && source != MyThisTask) + { + if(count != 8) + terminate("count=%d\n", count); + + MPI_Recv(&Recv[source], sizeof(struct send_recv_counts), MPI_BYTE, source, tag, MYCOMMUNICATOR, MPI_STATUS_IGNORE); + } + } + + if(barrier_active) + { + int flag2; + + MPI_Test(&barrier_request, &flag2, &status); + + if(flag2 != 0) + break; + } + else + { + MPI_Testall(n_requests, requests, &flag, MPI_STATUSES_IGNORE); + + if(flag) + { + barrier_active = 1; + + MPI_Ibarrier(MYCOMMUNICATOR, &barrier_request); + } + } + } + + myfree(requests); +} +#endif /* #ifdef USE_DSDE */ diff --git a/src/amuse/community/arepo/src/utils/mpz_extension.c b/src/amuse/community/arepo/src/utils/mpz_extension.c new file mode 100644 index 0000000000..87ba79dc9a --- /dev/null +++ b/src/amuse/community/arepo/src/utils/mpz_extension.c @@ -0,0 +1,119 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/mpz_extension.c + * \date 05/2018 + * \brief Auxiliary functions to facilitate usage of mpz functions. + * \details Integer arithmetic used by Voronoi mesh construction. + * contains functions: + * void MY_mpz_set_si(mpz_t dest, signed long long int val) + * void MY_mpz_mul_si(mpz_t prod, mpz_t mult, signed long long + * int val) + * void MY_mpz_sub_ui(mpz_t prod, mpz_t mult, + * unsigned long long int val) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#if USEDBITS > 31 + +/*! \brief Sets mpz variable from signed long long int. + * + * For Arepo-internal use of mpz. + * + * \param[out] dest Variable to be set. + * \param[in] val Value in signed long long int. + * + * \return void + */ +void MY_mpz_set_si(mpz_t dest, signed long long int val) +{ + mpz_t tmp, tmp2; + + unsigned long int lower = (unsigned long int)(val & 0xffffffffL); + signed long int higher = (signed long int)(val >> 32); + + mpz_init(tmp); + mpz_init(tmp2); + + mpz_set_si(tmp, higher); + mpz_mul_2exp(tmp2, tmp, 32); + mpz_add_ui(dest, tmp2, lower); + + mpz_clear(tmp2); + mpz_clear(tmp); +} + +/*! \brief Multiplies an mpz type with a signed long long int. + * + * \param[out] pred Result of multiplication. + * \param[in] mult Multiplicator (mpz_t). + * \param[in] val Multiplicand (signed long long int). + * + * \return void + */ +void MY_mpz_mul_si(mpz_t prod, mpz_t mult, signed long long int val) +{ + mpz_t tmp; + + mpz_init(tmp); + + MY_mpz_set_si(tmp, val); + + mpz_mul(prod, mult, tmp); + + mpz_clear(tmp); +} + +/*! \brief Subtracts 'val' from 'mult'. + * + * \param[out] prod Result of subtraction. + * \param[in] mult Minuend (mpz_t). + * \param[in] val Subtrahend (unsigned long long int). + * + * \return void + */ +void MY_mpz_sub_ui(mpz_t prod, mpz_t mult, unsigned long long int val) +{ + mpz_t tmp; + + mpz_init(tmp); + + MY_mpz_set_si(tmp, val); + + mpz_sub(prod, mult, tmp); + + mpz_clear(tmp); +} + +#endif diff --git a/src/amuse/community/arepo/src/utils/mymalloc.c b/src/amuse/community/arepo/src/utils/mymalloc.c new file mode 100644 index 0000000000..f3173883e6 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/mymalloc.c @@ -0,0 +1,792 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/mymalloc.c + * \date 05/2018 + * \brief Manager for dynamic memory allocation. + * \details This module handles the dynamic memory allocation for AREPO. + * To avoid memory allocation/dellocation overhead a big chunk of + * memory (which will be the maximum amount of dinamically + * allocatable memory) is allocated upon initialization. This + * chunk is then filled by the memory blocks as in a stack + * structure. The blocks are automatically aligned to a 64 bit + * boundary. Memory blocks come in two flavours: movable and + * non-movable. In non-movable blocks the starting address is + * fixed once the block is allocated and cannot be changed. + * Due to the stack structure of the dynamic memory, this implies + * that the last (non-movable) block allocated must be the first + * block to be deallocated. If this condition is not met, an abort + * condition is triggered. If more flexibility is needed, movable + * memory blocks can be used. In this case, the starting address + * of the block is again fixed upon allocation but the block can + * be shifted (therefore its initial address changes) according + * to needs. For a movable block to be successfully shifted it is + * required that all the subsequent allocated blocks are movable. + * Again, an abort condition is triggered if this condition is + * not met. Movable blocks can be deallocated in any order + * provided that the condition just described holds. The gap + * resulting form the deallocation of a block that is not in + * the last position will be automatically filled by shifting all + * the blocks coming after the deallocated block. + * + * contains functions: + * static void *hmalloc(size_t size) (HUGEPAGES) + * static void *hmalloc(size_t size) + * void mymalloc_init(void) + * void report_memory_usage(int rank, char *tabbuf) + * void report_detailed_memory_usage_of_largest_task(void) + * void dump_memory_table(void) + * int dump_memory_table_buffer(char *p) + * void *mymalloc_fullinfo + * void *mymalloc_movable_fullinfo + * size_t roundup_to_multiple_of_cacheline_size(size_t n) + * void myfree_fullinfo(void *p, const char *func, const char + * *file, int line) + * void *myfree_query_last_block(void) + * void myfree_movable_fullinfo(void *p, const char *func, + * const char *file, int line) + * void *myrealloc_fullinfo(void *p, size_t n, const char + * *func, const char *file, int line) + * void *myrealloc_movable_fullinfo(void *p, size_t n, + * const char *func, const char *file, int line) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#define CACHELINESIZE 64 + +#define MAXBLOCKS 5000 +#define MAXCHARS 40 + +static size_t AllocatedBytesGeneric; + +static size_t HighMarkBytes; +static size_t HighMarkBytesWithoutGeneric; + +static double OldGlobHighMarkMB; +static double OldGlobHighMarkMBWithoutGeneric; + +static size_t TotBytes; /*!< The total dimension (in bytes) of dynamic memory available to the current task. */ +static void *Base; /*!< Base pointer (initial memory address) of the stack. */ + +static unsigned long Nblocks; /*!< The current number of allocated memory blocks. */ + +static void **Table; /*!< Table containing the initial addresses of the allocated memory blocks. */ +static size_t *BlockSize; /*!< Array containing the size (in bytes) of all the allocated memory blocks. */ +static char *MovableFlag; /*!< Identifies whether a block is movable. */ +static char *GenericFlag; /*!< Identifies whether a block has been identified in the generic allocation routines. */ +static void ***BasePointers; /*!< Base pointers containing the initial addresses of movable memory blocks */ +static char *VarName; /*!< The name of the variable with which the block has been allocated. */ +static char *FunctionName; /*!< The function name that has allocated the memory block. */ +static char *ParentFileName; /*!< The location from which the generich routines were called */ +static char *FileName; /*!< The file name where the function that has allocated the block is called. */ +static int *LineNumber; /*!< The line number in FileName where the function that allocated the block has been called. */ +static char *HighMarkTabBuf; /*!< This is a buffer that holds the log-file output corresponding to the largest memory use that has + occurred on this task */ +static char *HighMarkTabBufWithoutGeneric; /*!< This is a buffer that holds the log-file output corresponding to the largest memory use + that has occurred on this task */ + +#ifdef HUGEPAGES +#include +/*! \brief Allocation function wrapper for hugepages usage. + * + * \param[in] size Size of the allocated memory. + * + * \return void pointer to address in memory. + */ +static void *hmalloc(size_t size) +{ + void *p = get_hugepage_region(size, GHR_STRICT); + + if(!p) + { + warn("Failed to get_hugepage_region of size %g\n", size / (1024.0 * 1024)); + + p = malloc(size); + + if(!p) + terminate("Failed to allocate memory of size %g\n", size / (1024.0 * 1024)); + } + + memset(p, 255, size); + memset(p, 0, size); + + return p; +} +#else /* #ifdef HUGEPAGES */ + +/*! \brief Allocation function wrapper without hugepages usage. + * + * \param[in] size Size of the allocated memory. + * + * \return void pointer to address in memory. + */ +static void *hmalloc(size_t size) { return malloc(size); } +#endif /* #ifdef HUGEPAGES #else */ + +/*! \brief Initializes memory manager. + * + * This function initializes the memory manager. In particular, it sets + * the global variables of the module to their initial value and allocates + * the memory for the stack. + * + * \return void + */ +void mymalloc_init(void) +{ + BlockSize = (size_t *)hmalloc(MAXBLOCKS * sizeof(size_t)); + Table = (void **)hmalloc(MAXBLOCKS * sizeof(void *)); + MovableFlag = (char *)hmalloc(MAXBLOCKS * sizeof(char)); + GenericFlag = (char *)hmalloc(MAXBLOCKS * sizeof(char)); + BasePointers = (void ***)hmalloc(MAXBLOCKS * sizeof(void **)); + VarName = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char)); + FunctionName = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char)); + ParentFileName = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char)); + FileName = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char)); + LineNumber = (int *)hmalloc(MAXBLOCKS * sizeof(int)); + HighMarkTabBuf = (char *)hmalloc((100 + 4 * MAXCHARS) * (MAXBLOCKS + 10)); + HighMarkTabBufWithoutGeneric = (char *)hmalloc((100 + 4 * MAXCHARS) * (MAXBLOCKS + 10)); + + memset(VarName, 0, MAXBLOCKS * MAXCHARS); + memset(FunctionName, 0, MAXBLOCKS * MAXCHARS); + memset(ParentFileName, 0, MAXBLOCKS * MAXCHARS); + memset(FileName, 0, MAXBLOCKS * MAXCHARS); + + size_t n = All.MaxMemSize * ((size_t)1024 * 1024); + + n = roundup_to_multiple_of_cacheline_size(n); + + if(!(Base = hmalloc(n))) + terminate("Failed to allocate memory for `Base' (%d Mbytes).\n", All.MaxMemSize); + + TotBytes = FreeBytes = n; + + AllocatedBytes = 0; + Nblocks = 0; + HighMarkBytes = 0; + HighMarkBytesWithoutGeneric = 0; + OldGlobHighMarkMB = 0; + OldGlobHighMarkMBWithoutGeneric = 0; +} + +/*! \brief Writes memory usage in FdMemory. + * + * \param[in] rank Number of tasks involved. + * \param[in] tabbuf Header message written in FdMemory. + * + * \return void + */ +void report_memory_usage(int rank, char *tabbuf) +{ + if(ThisTask == rank) + { + char *buf = mymalloc("buf", (100 + 4 * MAXCHARS) * (Nblocks + 10)); + int cc = 0; + cc += sprintf(buf + cc, "\nMEMORY: Largest Allocation = %g Mbyte | Largest Allocation Without Generic = %g Mbyte\n\n", + OldGlobHighMarkMB, OldGlobHighMarkMBWithoutGeneric); + + cc += sprintf(buf + cc, "%s", tabbuf); + if(ThisTask == 0) + { + if(RestartFlag <= 2) + { + fprintf(FdMemory, "%s", buf); + fflush(FdMemory); + } + } + else + { + MPI_Send(&cc, 1, MPI_INT, 0, TAG_N, MPI_COMM_WORLD); + MPI_Send(buf, cc + 1, MPI_BYTE, 0, TAG_PDATA, MPI_COMM_WORLD); + } + myfree(buf); + } + + if(ThisTask == 0 && rank > 0) + { + int cc; + MPI_Recv(&cc, 1, MPI_INT, rank, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + char *buf = mymalloc("buf", cc + 1); + MPI_Recv(buf, cc + 1, MPI_BYTE, rank, TAG_PDATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if(RestartFlag <= 2) + { + fprintf(FdMemory, "%s", buf); + fflush(FdMemory); + } + myfree(buf); + } +} + +/*! \brief Writes memory useage of largest task in FdMemory. + * + * \return void + */ +void report_detailed_memory_usage_of_largest_task(void) +{ + int flag = 0; + + struct + { + double mem; + int rank; + } local, global; + + local.mem = HighMarkBytes / (1024.0 * 1024.0); + local.rank = ThisTask; + + MPI_Allreduce(&local, &global, 1, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD); + + if(global.mem >= 1.05 * OldGlobHighMarkMB) + { + OldGlobHighMarkMB = global.mem; + flag |= 1; + } + + local.mem = HighMarkBytesWithoutGeneric / (1024.0 * 1024.0); + local.rank = ThisTask; + + MPI_Allreduce(&local, &global, 1, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD); + + if(global.mem >= 1.05 * OldGlobHighMarkMBWithoutGeneric) + { + OldGlobHighMarkMBWithoutGeneric = global.mem; + flag |= 2; + } + + if(flag & 2) + report_memory_usage(global.rank, HighMarkTabBufWithoutGeneric); + + if(flag & 1) + report_memory_usage(global.rank, HighMarkTabBuf); +} + +/*! \brief Dumps the buffer where the memory information is stored to the + * standard output. + * + * \return void + */ +void dump_memory_table(void) +{ + char *buf = malloc(200 * (Nblocks + 10)); + dump_memory_table_buffer(buf); + printf("%s", buf); + free(buf); +} + +/*! \brief Fills the output buffer with the memory log. + * + * \param[out] p Output buffer. + * + * \return The number of charcter written to p. + */ +int dump_memory_table_buffer(char *p) +{ + int cc = 0; + size_t totBlocksize = 0; + + cc += + sprintf(p + cc, "-------------------------- Allocated Memory Blocks---- ( Step %8d )------------------\n", All.NumCurrentTiStep); + cc += sprintf(p + cc, "Task Nr F Variable MBytes Cumulative Function|File|Linenumber\n"); + cc += sprintf(p + cc, "------------------------------------------------------------------------------------------\n"); + for(int i = 0; i < Nblocks; i++) + { + totBlocksize += BlockSize[i]; + + cc += sprintf(p + cc, "%4d %5d %d %40s %10.4f %10.4f %s%s()|%s|%d\n", ThisTask, i, MovableFlag[i], VarName + i * MAXCHARS, + BlockSize[i] / (1024.0 * 1024.0), totBlocksize / (1024.0 * 1024.0), ParentFileName + i * MAXCHARS, + FunctionName + i * MAXCHARS, FileName + i * MAXCHARS, LineNumber[i]); + } + cc += sprintf(p + cc, "------------------------------------------------------------------------------------------\n"); + + return cc; +} + +/*! \brief Allocates a non-movable memory block and store the relative + * information. + * + * \param[in] varname Name of the variable to be stored in the allocated + * block. + * \param[in] n Size of the memory block in bytes. + * \param[in] func Name of function that has called the allocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the allocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the allocation routine was + * called (usually given by the __LINE__ macro). + * + * \return A pointer to the beginning of the allocated memory block. + */ +void *mymalloc_fullinfo(const char *varname, size_t n, const char *func, const char *file, int line, int clear_flag, char *callorigin) +{ + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + if(n < CACHELINESIZE) + n = CACHELINESIZE; + + if(Nblocks >= MAXBLOCKS) + terminate("Task=%d: No blocks left in mymalloc_fullinfo() at %s()/%s/line %d. MAXBLOCKS=%d\n", ThisTask, func, file, line, + MAXBLOCKS); + + if(n > FreeBytes) + { + dump_memory_table(); + terminate( + "\nTask=%d: Not enough memory in mymalloc_fullinfo() to allocate %g MB for variable '%s' at %s()/%s/line %d (FreeBytes=%g " + "MB).\n", + ThisTask, n / (1024.0 * 1024.0), varname, func, file, line, FreeBytes / (1024.0 * 1024.0)); + } + Table[Nblocks] = Base + (TotBytes - FreeBytes); + FreeBytes -= n; + + strncpy(VarName + Nblocks * MAXCHARS, varname, MAXCHARS - 1); + if(callorigin) + { + strncpy(ParentFileName + Nblocks * MAXCHARS, callorigin, MAXCHARS - 1); + GenericFlag[Nblocks] = 1; + AllocatedBytesGeneric += n; + } + else + { + memset(ParentFileName + Nblocks * MAXCHARS, 0, MAXCHARS); + GenericFlag[Nblocks] = 0; + } + strncpy(FunctionName + Nblocks * MAXCHARS, func, MAXCHARS - 1); + strncpy(FileName + Nblocks * MAXCHARS, file, MAXCHARS - 1); + LineNumber[Nblocks] = line; + + AllocatedBytes += n; + BlockSize[Nblocks] = n; + MovableFlag[Nblocks] = 0; + + Nblocks += 1; + + if(AllocatedBytes - AllocatedBytesGeneric > HighMarkBytesWithoutGeneric) + { + HighMarkBytesWithoutGeneric = AllocatedBytes - AllocatedBytesGeneric; + dump_memory_table_buffer(HighMarkTabBufWithoutGeneric); + } + + if(AllocatedBytes > HighMarkBytes) + { + HighMarkBytes = AllocatedBytes; + dump_memory_table_buffer(HighMarkTabBuf); + } + + if(clear_flag) + memset(Table[Nblocks - 1], 0, n); + + return Table[Nblocks - 1]; +} + +/*! \brief Allocates a movable memory block and store the relative information. + * + * \param[in] ptr Pointer to the initial memory address of the block. + * \param[in] varname Name of the variable to be stored in the allocated block. + * \param[in] n Size of the memory block in bytes. + * \param[in] func Name of function that has called the allocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the allocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the allocation routine was + * called (usually given by the __LINE__ macro). + * + * \return A pointer to the beginning of the allocated memory block. + */ +void *mymalloc_movable_fullinfo(void *ptr, const char *varname, size_t n, const char *func, const char *file, int line, + char *callorigin) +{ + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + if(n < CACHELINESIZE) + n = CACHELINESIZE; + + if(Nblocks >= MAXBLOCKS) + terminate("Task=%d: No blocks left in mymalloc_fullinfo() at %s()/%s/line %d. MAXBLOCKS=%d\n", ThisTask, func, file, line, + MAXBLOCKS); + + if(n > FreeBytes) + { + dump_memory_table(); + terminate( + "\nTask=%d: Not enough memory in mymalloc_fullinfo() to allocate %g MB for variable '%s' at %s()/%s/line %d (FreeBytes=%g " + "MB).\n", + ThisTask, n / (1024.0 * 1024.0), varname, func, file, line, FreeBytes / (1024.0 * 1024.0)); + } + Table[Nblocks] = Base + (TotBytes - FreeBytes); + FreeBytes -= n; + + strncpy(VarName + Nblocks * MAXCHARS, varname, MAXCHARS - 1); + if(callorigin) + { + strncpy(ParentFileName + Nblocks * MAXCHARS, callorigin, MAXCHARS - 1); + GenericFlag[Nblocks] = 1; + AllocatedBytesGeneric += n; + } + else + { + memset(ParentFileName + Nblocks * MAXCHARS, 0, MAXCHARS); + GenericFlag[Nblocks] = 0; + } + strncpy(FunctionName + Nblocks * MAXCHARS, func, MAXCHARS - 1); + strncpy(FileName + Nblocks * MAXCHARS, file, MAXCHARS - 1); + LineNumber[Nblocks] = line; + + AllocatedBytes += n; + BlockSize[Nblocks] = n; + MovableFlag[Nblocks] = 1; + BasePointers[Nblocks] = ptr; + + Nblocks += 1; + + if(AllocatedBytes - AllocatedBytesGeneric > HighMarkBytesWithoutGeneric) + { + HighMarkBytesWithoutGeneric = AllocatedBytes - AllocatedBytesGeneric; + dump_memory_table_buffer(HighMarkTabBufWithoutGeneric); + } + + if(AllocatedBytes > HighMarkBytes) + { + HighMarkBytes = AllocatedBytes; + dump_memory_table_buffer(HighMarkTabBuf); + } + + return Table[Nblocks - 1]; +} + +/*! \brief Rounds up size to cachline size. + * + * \param[in] n Size. + * + * \return Rounded up size. + */ +size_t roundup_to_multiple_of_cacheline_size(size_t n) +{ + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + return n; +} + +/*! \brief Deallocates a non-movable memory block. + * + * For this operation to be successful the block that has to be deallocated + * must be the last allocated one. + * + * \param[in] p Pointer to the memory block to be deallocated. + * \param[in] func Name of function that has called the deallocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the deallocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the deallocation routine was + * called (usually given by the __LINE__ macro). + */ +void myfree_fullinfo(void *p, const char *func, const char *file, int line) +{ + if(Nblocks == 0) + terminate("no allocated blocks that could be freed"); + + if(p != Table[Nblocks - 1]) + { + dump_memory_table(); + terminate("Task=%d: Wrong call of myfree() at %s()/%s/line %d: not the last allocated block!\n", ThisTask, func, file, line); + } + + Nblocks -= 1; + AllocatedBytes -= BlockSize[Nblocks]; + + if(GenericFlag[Nblocks]) + AllocatedBytesGeneric -= BlockSize[Nblocks]; + + FreeBytes += BlockSize[Nblocks]; +} + +/*! \brief Finds last allocated block. + * + * \return void pointer to last allocated block. + */ +void *myfree_query_last_block(void) +{ + if(Nblocks == 0) + terminate("no allocated blocks that could be returned"); + + return Table[Nblocks - 1]; +} + +/*! \brief Deallocates a movable memory block. + * + * For this operation to be successful all the blocks allocated after the + * block that has to be freed must be of movable type. + * + * \param[in] p pointer to the memory block to be deallocated. + * \param[in] func name of function that has called the deallocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file file where the function that has called the deallocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line line number of file where the deallocation routine was + * called (usually given by the __LINE__ macro). + * + * \return void + */ +void myfree_movable_fullinfo(void *p, const char *func, const char *file, int line) +{ + int i; + + if(Nblocks == 0) + terminate("no allocated blocks that could be freed"); + + /* first, let's find the block */ + int nr; + + for(nr = Nblocks - 1; nr >= 0; nr--) + if(p == Table[nr]) + break; + + if(nr < 0) + { + dump_memory_table(); + terminate("Task=%d: Wrong call of myfree_movable() from %s()/%s/line %d - this block has not been allocated!\n", ThisTask, func, + file, line); + } + + if(nr < Nblocks - 1) /* the block is not the last allocated block */ + { + /* check that all subsequent blocks are actually movable */ + for(i = nr + 1; i < Nblocks; i++) + if(MovableFlag[i] == 0) + { + dump_memory_table(); + myflush(stdout); + terminate( + "Task=%d: Wrong call of myfree_movable() from %s()/%s/line %d - behind block=%d there are subsequent non-movable " + "allocated blocks\n", + ThisTask, func, file, line, nr); + } + } + + if(GenericFlag[nr]) + AllocatedBytesGeneric -= BlockSize[nr]; + + AllocatedBytes -= BlockSize[nr]; + FreeBytes += BlockSize[nr]; + + ptrdiff_t offset = -BlockSize[nr]; + size_t length = 0; + + for(i = nr + 1; i < Nblocks; i++) + length += BlockSize[i]; + + if(nr < Nblocks - 1) + memmove(Table[nr + 1] + offset, Table[nr + 1], length); + + for(i = nr + 1; i < Nblocks; i++) + { + Table[i] += offset; + *BasePointers[i] = *BasePointers[i] + offset; + } + + for(i = nr + 1; i < Nblocks; i++) + { + Table[i - 1] = Table[i]; + BasePointers[i - 1] = BasePointers[i]; + BlockSize[i - 1] = BlockSize[i]; + MovableFlag[i - 1] = MovableFlag[i]; + GenericFlag[i - 1] = GenericFlag[i]; + + strncpy(VarName + (i - 1) * MAXCHARS, VarName + i * MAXCHARS, MAXCHARS - 1); + strncpy(FunctionName + (i - 1) * MAXCHARS, FunctionName + i * MAXCHARS, MAXCHARS - 1); + strncpy(ParentFileName + (i - 1) * MAXCHARS, ParentFileName + i * MAXCHARS, MAXCHARS - 1); + strncpy(FileName + (i - 1) * MAXCHARS, FileName + i * MAXCHARS, MAXCHARS - 1); + LineNumber[i - 1] = LineNumber[i]; + } + + Nblocks -= 1; +} + +/*! \brief Reallocates an existing non-movable memory block. + * + * For this operation to be successful this must be the last allocated block. + * + * \param[in] p Pointer to the existing memory block to be reallocated. + * \param[in] n The new size of the memory block in bytes. + * \param[in] func Name of function that has called the reallocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the reallocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the reallocation routine was + * called (usually given by the __LINE__ macro). + * + * \return A pointer to the beginning of the newly allocated memory block. + */ +void *myrealloc_fullinfo(void *p, size_t n, const char *func, const char *file, int line) +{ + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + if(n < CACHELINESIZE) + n = CACHELINESIZE; + + if(Nblocks == 0) + terminate("no allocated blocks that could be reallocated"); + + if(p != Table[Nblocks - 1]) + { + dump_memory_table(); + terminate("Task=%d: Wrong call of myrealloc() at %s()/%s/line %d - not the last allocated block!\n", ThisTask, func, file, line); + } + + if(GenericFlag[Nblocks - 1]) + AllocatedBytesGeneric -= BlockSize[Nblocks - 1]; + + AllocatedBytes -= BlockSize[Nblocks - 1]; + FreeBytes += BlockSize[Nblocks - 1]; + + if(n > FreeBytes) + { + dump_memory_table(); + terminate("Task=%d: Not enough memory in myremalloc(n=%g MB) at %s()/%s/line %d. previous=%g FreeBytes=%g MB\n", ThisTask, + n / (1024.0 * 1024.0), func, file, line, BlockSize[Nblocks - 1] / (1024.0 * 1024.0), FreeBytes / (1024.0 * 1024.0)); + } + Table[Nblocks - 1] = Base + (TotBytes - FreeBytes); + FreeBytes -= n; + + AllocatedBytes += n; + BlockSize[Nblocks - 1] = n; + + if(AllocatedBytes > HighMarkBytes) + { + HighMarkBytes = AllocatedBytes; + dump_memory_table_buffer(HighMarkTabBuf); + } + + return Table[Nblocks - 1]; +} + +/*! \brief Reallocates an existing movable memory block. + * + * For this operation to be successful all the blocks allocated after the + * block that has to be reallocated must be of movable type. + * + * \param[in] p Pointer to the existing memory block to be reallocated. + * \param[in] n The new size of the memory block in bytes. + * \param[in] func Name of function that has called the reallocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the reallocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the reallocation routine was + * called (usually given by the __LINE__ macro). + * + * \return A pointer to the beginning of the newly allocated memory block. + */ +void *myrealloc_movable_fullinfo(void *p, size_t n, const char *func, const char *file, int line) +{ + int i; + + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + if(n < CACHELINESIZE) + n = CACHELINESIZE; + + if(Nblocks == 0) + terminate("no allocated blocks that could be reallocated"); + + /* first, let's find the block */ + int nr; + + for(nr = Nblocks - 1; nr >= 0; nr--) + if(p == Table[nr]) + break; + + if(nr < 0) + { + dump_memory_table(); + terminate("Task=%d: Wrong call of myrealloc_movable() from %s()/%s/line %d - this block has not been allocated!\n", ThisTask, + func, file, line); + } + + if(nr < Nblocks - 1) /* the block is not the last allocated block */ + { + /* check that all subsequent blocks are actually movable */ + for(i = nr + 1; i < Nblocks; i++) + if(MovableFlag[i] == 0) + { + dump_memory_table(); + terminate( + "Task=%d: Wrong call of myrealloc_movable() from %s()/%s/line %d - behind block=%d there are subsequent non-movable " + "allocated blocks\n", + ThisTask, func, file, line, nr); + } + } + + if(GenericFlag[nr]) + terminate("unexpected"); + + AllocatedBytes -= BlockSize[nr]; + FreeBytes += BlockSize[nr]; + + if(n > FreeBytes) + { + dump_memory_table(); + terminate("Task=%d: at %s()/%s/line %d: Not enough memory in myremalloc_movable(n=%g MB). previous=%g FreeBytes=%g MB\n", + ThisTask, func, file, line, n / (1024.0 * 1024.0), BlockSize[nr] / (1024.0 * 1024.0), FreeBytes / (1024.0 * 1024.0)); + } + + ptrdiff_t offset = n - BlockSize[nr]; + size_t length = 0; + + for(i = nr + 1; i < Nblocks; i++) + length += BlockSize[i]; + + if(nr < Nblocks - 1) + memmove(Table[nr + 1] + offset, Table[nr + 1], length); + + for(i = nr + 1; i < Nblocks; i++) + { + Table[i] += offset; + + *BasePointers[i] = *BasePointers[i] + offset; + } + + FreeBytes -= n; + AllocatedBytes += n; + BlockSize[nr] = n; + + if(AllocatedBytes > HighMarkBytes) + { + HighMarkBytes = AllocatedBytes; + dump_memory_table_buffer(HighMarkTabBuf); + } + + return Table[nr]; +} diff --git a/src/amuse/community/arepo/src/utils/parallel_sort.c b/src/amuse/community/arepo/src/utils/parallel_sort.c new file mode 100644 index 0000000000..f825a9f220 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/parallel_sort.c @@ -0,0 +1,743 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/parallel_sort.c + * \date 05/2018 + * \brief MPI parallel sorting routine. + * \details contains functions: + * int parallel_sort_indirect_compare(const void *a, + * const void *b) + * double parallel_sort(void *base, size_t nmemb, size_t size, + * int (*compar) (const void *, const void *)) + * double parallel_sort_comm(void *base, size_t nmemb, size_t + * size, int (*compar) (const void *, const void *), + * MPI_Comm comm) + * static void get_local_rank(char *element, size_t + * tie_braking_rank, char *base, size_t nmemb, size_t size, + * size_t noffs_thistask, long long left, long long right, + * size_t * loc, int (*compar) (const void *, const void *)) + * static void check_local_rank(char *element, size_t + * tie_braking_rank, char *base, size_t nmemb, size_t size, + * size_t noffs_thistask, long long left, long long right, + * size_t loc, int (*compar) (const void *, const void *)) + * static void serial_sort(char *base, size_t nmemb, size_t + * size, int (*compar) (const void *, const void *)) + * static void msort_serial_with_tmp(char *base, size_t n, + * size_t s, int (*compar) (const void *, const void *), + * char *t) + * void parallel_sort_test_order(char *base, size_t nmemb, + * size_t size, int (*compar) (const void *, const void *)) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#define TRANSFER_SIZE_LIMIT 1000000000 +#define MAX_ITER_PARALLEL_SORT 500 + +/* Note: For gcc-4.1.2, I found that the compiler produces incorrect code for this routune if optimization level O1 or higher is used. + * In gcc-4.3.4, this problem is absent. + */ + +#define TAG_TRANSFER 100 + +static void serial_sort(char *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)); +static void msort_serial_with_tmp(char *base, size_t n, size_t s, int (*compar)(const void *, const void *), char *t); +static void get_local_rank(char *element, size_t tie_braking_rank, char *base, size_t nmemb, size_t size, size_t noffs_thistask, + long long left, long long right, size_t *loc, int (*compar)(const void *, const void *)); + +static int (*comparfunc)(const void *, const void *); +static char *median_element_list; +static size_t element_size; + +/*! \brief Wrapper for comparison of two elements. + * + * \param[in] a First element. + * \param[in] b Second element. + * + * \return (-1,0,+1) -1 if a < b. + */ +int parallel_sort_indirect_compare(const void *a, const void *b) +{ + return (*comparfunc)(median_element_list + *((int *)a) * element_size, median_element_list + *((int *)b) * element_size); +} + +/*! \brief Main function to perform a parallel sort. + * + * Using MPI_COMM_WORLD as communicator. + * + * \param[in, out] base Array to be sorted. + * \param nmemb Number of entries in array. + * \param[in] size Size of an element in array to be sorted. + * \param[in] compar Comparison function. + * + * \return Time it took to sort array. + */ +double parallel_sort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)) +{ + return parallel_sort_comm(base, nmemb, size, compar, MPI_COMM_WORLD); +} + +/*! \brief Function to perform a parallel sort with specified MPI communicator. + * + * \param[in, out] base Array to be sorted. + * \param[in] nmemb Number of entries in array. + * \param[in] size Size of an element in array to be sorted. + * \param[in] compar Comparison function. + * \param[in] comm MPI communicator. + * + * \return Time it took to sort array. + */ +double parallel_sort_comm(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), MPI_Comm comm) +{ + int i, j, ranks_not_found, Local_ThisTask, Local_NTask, Local_PTask, Color, new_max_loc; + size_t tie_braking_rank, new_tie_braking_rank, rank; + MPI_Comm MPI_CommLocal; + + double ta = second(); + + /* do a serial sort of the local data up front */ + serial_sort((char *)base, nmemb, size, compar); + + /* we create a communicator that contains just those tasks with nmemb > 0. This makes + * it easier to deal with CPUs that do not hold any data. + */ + if(nmemb) + Color = 1; + else + Color = 0; + + MPI_Comm_split(comm, Color, ThisTask, &MPI_CommLocal); + MPI_Comm_rank(MPI_CommLocal, &Local_ThisTask); + MPI_Comm_size(MPI_CommLocal, &Local_NTask); + + if(Local_NTask > 1 && Color == 1) + { + for(Local_PTask = 0; Local_NTask > (1 << Local_PTask); Local_PTask++) + ; + + size_t *nlist = (size_t *)mymalloc("nlist", Local_NTask * sizeof(size_t)); + size_t *noffs = (size_t *)mymalloc("noffs", Local_NTask * sizeof(size_t)); + + MPI_Allgather(&nmemb, sizeof(size_t), MPI_BYTE, nlist, sizeof(size_t), MPI_BYTE, MPI_CommLocal); + + for(i = 1, noffs[0] = 0; i < Local_NTask; i++) + noffs[i] = noffs[i - 1] + nlist[i - 1]; + + char *element_guess = mymalloc("element_guess", Local_NTask * size); + size_t *element_tie_braking_rank = mymalloc("element_tie_braking_rank", Local_NTask * sizeof(size_t)); + size_t *desired_glob_rank = mymalloc("desired_glob_rank", Local_NTask * sizeof(size_t)); + size_t *current_glob_rank = mymalloc("current_glob_rank", Local_NTask * sizeof(size_t)); + size_t *current_loc_rank = mymalloc("current_loc_rank", Local_NTask * sizeof(size_t)); + long long *range_left = mymalloc("range_left", Local_NTask * sizeof(long long)); + long long *range_right = mymalloc("range_right", Local_NTask * sizeof(long long)); + int *max_loc = mymalloc("max_loc", Local_NTask * sizeof(int)); + + size_t *list = mymalloc("list", Local_NTask * sizeof(size_t)); + size_t *range_len_list = mymalloc("range_len_list", Local_NTask * sizeof(long long)); + char *median_element = mymalloc("median_element", size); + median_element_list = mymalloc("median_element_list", Local_NTask * size); + size_t *tie_braking_rank_list = mymalloc("tie_braking_rank_list", Local_NTask * sizeof(size_t)); + int *index_list = mymalloc("index_list", Local_NTask * sizeof(int)); + int *max_loc_list = mymalloc("max_loc_list", Local_NTask * sizeof(int)); + size_t *source_range_len_list = mymalloc("source_range_len_list", Local_NTask * sizeof(long long)); + size_t *source_tie_braking_rank_list = mymalloc("source_tie_braking_rank_list", Local_NTask * sizeof(long long)); + char *source_median_element_list = mymalloc("source_median_element_list", Local_NTask * size); + char *new_element_guess = mymalloc("new_element_guess", size); + + for(i = 0; i < Local_NTask - 1; i++) + { + desired_glob_rank[i] = noffs[i + 1]; + current_glob_rank[i] = 0; + range_left[i] = 0; /* first element that it can be */ + range_right[i] = nmemb; /* first element that it can not be */ + } + + /* now we determine the first split element guess, which is the same for all divisions in the first iteration */ + + /* find the median of each processor, and then take the median among those values. + * This should work reasonably well even for extremely skewed distributions + */ + long long range_len = range_right[0] - range_left[0]; + + if(range_len >= 1) + { + long long mid = (range_left[0] + range_right[0]) / 2; + memcpy(median_element, (char *)base + mid * size, size); + tie_braking_rank = mid + noffs[Local_ThisTask]; + } + + MPI_Gather(&range_len, sizeof(long long), MPI_BYTE, range_len_list, sizeof(long long), MPI_BYTE, 0, MPI_CommLocal); + MPI_Gather(median_element, size, MPI_BYTE, median_element_list, size, MPI_BYTE, 0, MPI_CommLocal); + MPI_Gather(&tie_braking_rank, sizeof(size_t), MPI_BYTE, tie_braking_rank_list, sizeof(size_t), MPI_BYTE, 0, MPI_CommLocal); + + if(Local_ThisTask == 0) + { + for(j = 0; j < Local_NTask; j++) + max_loc_list[j] = j; + + /* eliminate the elements that are undefined because the corresponding CPU has zero range left */ + int nleft = Local_NTask; + + for(j = 0; j < nleft; j++) + { + if(range_len_list[j] < 1) + { + range_len_list[j] = range_len_list[nleft - 1]; + if(range_len_list[nleft - 1] >= 1 && j != (nleft - 1)) + { + memcpy(median_element_list + j * size, median_element_list + (nleft - 1) * size, size); + memcpy(tie_braking_rank_list + j, tie_braking_rank_list + (nleft - 1), sizeof(size_t)); + max_loc_list[j] = max_loc_list[nleft - 1]; + } + + nleft--; + j--; + } + } + + /* do a serial sort of the remaining elements (indirectly, so that we have the order of tie braking list as well) */ + comparfunc = compar; + element_size = size; + for(j = 0; j < nleft; j++) + index_list[j] = j; + qsort(index_list, nleft, sizeof(int), parallel_sort_indirect_compare); + + /* now select the median of the medians */ + int mid = nleft / 2; + memcpy(&element_guess[0], median_element_list + index_list[mid] * size, size); + element_tie_braking_rank[0] = tie_braking_rank_list[index_list[mid]]; + max_loc[0] = max_loc_list[index_list[mid]]; + } + + MPI_Bcast(element_guess, size, MPI_BYTE, 0, MPI_CommLocal); + MPI_Bcast(&element_tie_braking_rank[0], sizeof(size_t), MPI_BYTE, 0, MPI_CommLocal); + MPI_Bcast(&max_loc[0], 1, MPI_INT, 0, MPI_CommLocal); + + for(i = 1; i < Local_NTask - 1; i++) + { + memcpy(element_guess + i * size, element_guess, size); + element_tie_braking_rank[i] = element_tie_braking_rank[0]; + max_loc[i] = max_loc[0]; + } + + int iter = 0; + + do + { + for(i = 0; i < Local_NTask - 1; i++) + { + if(current_glob_rank[i] != desired_glob_rank[i]) + { + get_local_rank(element_guess + i * size, element_tie_braking_rank[i], (char *)base, nmemb, size, + noffs[Local_ThisTask], range_left[i], range_right[i], ¤t_loc_rank[i], compar); + } + } + + /* now compute the global ranks by summing the local ranks */ + /* Note: the last element in current_loc_rank is not defined. It will be summed by the last processor, and stored in the last + * element of current_glob_rank */ + MPI_Alltoall(current_loc_rank, sizeof(size_t), MPI_BYTE, list, sizeof(size_t), MPI_BYTE, MPI_CommLocal); + for(j = 0, rank = 0; j < Local_NTask; j++) + rank += list[j]; + MPI_Allgather(&rank, sizeof(size_t), MPI_BYTE, current_glob_rank, sizeof(size_t), MPI_BYTE, MPI_CommLocal); + + for(i = 0, ranks_not_found = 0; i < Local_NTask - 1; i++) + { + if(current_glob_rank[i] != desired_glob_rank[i]) /* here we're not yet done */ + { + ranks_not_found++; + + if(current_glob_rank[i] < desired_glob_rank[i]) + { + range_left[i] = current_loc_rank[i]; + + if(Local_ThisTask == max_loc[i]) + range_left[i]++; + } + + if(current_glob_rank[i] > desired_glob_rank[i]) + range_right[i] = current_loc_rank[i]; + } + } + + /* now we need to determine new element guesses */ + for(i = 0; i < Local_NTask - 1; i++) + { + if(current_glob_rank[i] != desired_glob_rank[i]) /* here we're not yet done */ + { + /* find the median of each processor, and then take the median among those values. + * This should work reasonably well even for extremely skewed distributions + */ + source_range_len_list[i] = range_right[i] - range_left[i]; + + if(source_range_len_list[i] >= 1) + { + long long middle = (range_left[i] + range_right[i]) / 2; + memcpy(source_median_element_list + i * size, (char *)base + middle * size, size); + source_tie_braking_rank_list[i] = middle + noffs[Local_ThisTask]; + } + } + } + + MPI_Alltoall(source_range_len_list, sizeof(long long), MPI_BYTE, range_len_list, sizeof(long long), MPI_BYTE, MPI_CommLocal); + MPI_Alltoall(source_median_element_list, size, MPI_BYTE, median_element_list, size, MPI_BYTE, MPI_CommLocal); + MPI_Alltoall(source_tie_braking_rank_list, sizeof(size_t), MPI_BYTE, tie_braking_rank_list, sizeof(size_t), MPI_BYTE, + MPI_CommLocal); + + if(Local_ThisTask < Local_NTask - 1) + { + if(current_glob_rank[Local_ThisTask] != + desired_glob_rank[Local_ThisTask]) /* in this case we're not yet done for this split point */ + { + for(j = 0; j < Local_NTask; j++) + max_loc_list[j] = j; + + /* eliminate the elements that are undefined because the corresponding CPU has zero range left */ + int nleft = Local_NTask; + + for(j = 0; j < nleft; j++) + { + if(range_len_list[j] < 1) + { + range_len_list[j] = range_len_list[nleft - 1]; + if(range_len_list[nleft - 1] >= 1 && j != (nleft - 1)) + { + memcpy(median_element_list + j * size, median_element_list + (nleft - 1) * size, size); + memcpy(tie_braking_rank_list + j, tie_braking_rank_list + (nleft - 1), sizeof(size_t)); + max_loc_list[j] = max_loc_list[nleft - 1]; + } + + nleft--; + j--; + } + } + + if((iter & 1)) + { + int max_range, maxj; + + for(j = 0, maxj = 0, max_range = 0; j < nleft; j++) + if(range_len_list[j] > max_range) + { + max_range = range_len_list[j]; + maxj = j; + } + + /* now select the median element from the task which has the largest range */ + memcpy(new_element_guess, median_element_list + maxj * size, size); + new_tie_braking_rank = tie_braking_rank_list[maxj]; + new_max_loc = max_loc_list[maxj]; + } + else + { + /* do a serial sort of the remaining elements (indirectly, so that we have the order of tie braking list as well) + */ + comparfunc = compar; + element_size = size; + for(j = 0; j < nleft; j++) + index_list[j] = j; + qsort(index_list, nleft, sizeof(int), parallel_sort_indirect_compare); + + /* now select the median of the medians */ + int mid = nleft / 2; + memcpy(new_element_guess, median_element_list + index_list[mid] * size, size); + new_tie_braking_rank = tie_braking_rank_list[index_list[mid]]; + new_max_loc = max_loc_list[index_list[mid]]; + } + } + else + { + /* in order to preserve existing guesses */ + memcpy(new_element_guess, element_guess + Local_ThisTask * size, size); + new_tie_braking_rank = element_tie_braking_rank[Local_ThisTask]; + new_max_loc = max_loc[Local_ThisTask]; + } + } + + MPI_Allgather(new_element_guess, size, MPI_BYTE, element_guess, size, MPI_BYTE, MPI_CommLocal); + MPI_Allgather(&new_tie_braking_rank, sizeof(size_t), MPI_BYTE, element_tie_braking_rank, sizeof(size_t), MPI_BYTE, + MPI_CommLocal); + MPI_Allgather(&new_max_loc, 1, MPI_INT, max_loc, 1, MPI_INT, MPI_CommLocal); + + iter++; + + if(iter > (MAX_ITER_PARALLEL_SORT - 100) && Local_ThisTask == 0) + { + printf("PSORT: iter=%d: ranks_not_found=%d Local_NTask=%d\n", iter, ranks_not_found, Local_NTask); + myflush(stdout); + if(iter > MAX_ITER_PARALLEL_SORT) + terminate("can't find the split points. That's odd"); + } + } + while(ranks_not_found); + + myfree(new_element_guess); + myfree(source_median_element_list); + myfree(source_tie_braking_rank_list); + myfree(source_range_len_list); + myfree(max_loc_list); + myfree(index_list); + myfree(tie_braking_rank_list); + myfree(median_element_list); + myfree(median_element); + + /* At this point we have found all the elements corresponding to the desired split points */ + /* we can now go ahead and determine how many elements of the local CPU have to go to each other CPU */ + + if(nmemb * size > (1LL << 31)) + terminate("currently, local data must be smaller than 2 GB"); + /* note: to restrict this limitation, the send/recv count arrays have to made 64-bit, + * and the MPI data exchange though MPI_Alltoall has to be modified such that buffers > 2 GB become possible + */ + + int *send_count = mymalloc("send_count", Local_NTask * sizeof(int)); + int *recv_count = mymalloc("recv_count", Local_NTask * sizeof(int)); + int *send_offset = mymalloc("send_offset", Local_NTask * sizeof(int)); + int *recv_offset = mymalloc("recv_offset", Local_NTask * sizeof(int)); + + for(i = 0; i < Local_NTask; i++) + send_count[i] = 0; + + int target = 0; + + for(i = 0; i < nmemb; i++) + { + while(target < Local_NTask - 1) + { + int cmp = compar((char *)base + i * size, element_guess + target * size); + if(cmp == 0) + { + if(i + noffs[Local_ThisTask] < element_tie_braking_rank[target]) + cmp = -1; + else if(i + noffs[Local_ThisTask] > element_tie_braking_rank[target]) + cmp = +1; + } + if(cmp >= 0) + target++; + else + break; + } + send_count[target]++; + } + + MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_CommLocal); + + size_t nimport; + + for(j = 0, nimport = 0, recv_offset[0] = 0, send_offset[0] = 0; j < Local_NTask; j++) + { + nimport += recv_count[j]; + + if(j > 0) + { + send_offset[j] = send_offset[j - 1] + send_count[j - 1]; + recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1]; + } + } + + if(nimport != nmemb) + terminate("nimport != nmemb"); + + for(j = 0; j < Local_NTask; j++) + { + send_count[j] *= size; + recv_count[j] *= size; + + send_offset[j] *= size; + recv_offset[j] *= size; + } + + char *basetmp = mymalloc("basetmp", nmemb * size); + + /* exchange the data */ + MPI_Alltoallv(base, send_count, send_offset, MPI_BYTE, basetmp, recv_count, recv_offset, MPI_BYTE, MPI_CommLocal); + + memcpy(base, basetmp, nmemb * size); + myfree(basetmp); + + serial_sort((char *)base, nmemb, size, compar); + + myfree(recv_offset); + myfree(send_offset); + myfree(recv_count); + myfree(send_count); + + myfree(range_len_list); + myfree(list); + myfree(max_loc); + myfree(range_right); + myfree(range_left); + myfree(current_loc_rank); + myfree(current_glob_rank); + myfree(desired_glob_rank); + myfree(element_tie_braking_rank); + myfree(element_guess); + myfree(noffs); + myfree(nlist); + } + + MPI_Comm_free(&MPI_CommLocal); + + double tb = second(); + return timediff(ta, tb); +} + +/*! \brief Get rank of an element. + * + * \param[in] element Element of which we want the rank. + * \param[in] tie_braking_rank The inital global rank of this element (needed + * for braking ties). + * \param[in] base Base address of local data. + * \param[in] nmemb Number of elements in array. + * \param[in] size Size of local data. + * \param[in] noffs_thistask Cumulative length of data on lower tasks. + * \param[in] left Range of elements on local task that may hold the element. + * \param[in] right Range of elements on local task that may hold the element. + * \param[out] loc Local rank of the element. + * \param[in] compar User-specified comparison function. + * + * \return void + */ +static void get_local_rank(char *element, size_t tie_braking_rank, char *base, size_t nmemb, size_t size, size_t noffs_thistask, + long long left, long long right, size_t *loc, int (*compar)(const void *, const void *)) +{ + if(right < left) + terminate("right < left"); + + if(left == 0 && right == nmemb + 1) + { + if(compar(base + (nmemb - 1) * size, element) < 0) + { + *loc = nmemb; + return; + } + else if(compar(base, element) > 0) + { + *loc = 0; + return; + } + } + + if(right == left) /* looks like we already converged to the proper rank */ + { + *loc = left; + } + else + { + if(compar(base + (right - 1) * size, element) < 0) /* the last element is smaller, hence all elements are on the left */ + *loc = (right - 1) + 1; + else if(compar(base + left * size, element) > 0) /* the first element is already larger, hence no element is on the left */ + *loc = left; + else + { + while(right > left) + { + long long mid = ((right - 1) + left) / 2; + + int cmp = compar(base + mid * size, element); + if(cmp == 0) + { + if(mid + noffs_thistask < tie_braking_rank) + cmp = -1; + else if(mid + noffs_thistask > tie_braking_rank) + cmp = +1; + } + + if(cmp == 0) /* element has exactly been found */ + { + *loc = mid; + break; + } + + if((right - 1) == left) /* elements is not on this CPU */ + { + if(cmp < 0) + *loc = mid + 1; + else + *loc = mid; + break; + } + + if(cmp < 0) + { + left = mid + 1; + } + else + { + if((right - 1) == left + 1) + { + if(mid != left) + terminate("Can't be: -->left=%lld right=%lld\n", left, right); + + *loc = left; + break; + } + + right = mid; + } + } + } + } +} + +/*! \brief Wrapper for serial sorting algorithm. + * + * Calls a merge sort algorithm. + * + * \param[in, out] base Array to be sorted. + * \param[in] nmemb Number of elements in array. + * \param[in] size Size of each element. + * \param[in] compar Comparison funciton. + * + * \return void + */ +static void serial_sort(char *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)) +{ + size_t storage = nmemb * size; + char *tmp = (char *)mymalloc("tmp", storage); + + msort_serial_with_tmp(base, nmemb, size, compar, tmp); + + myfree(tmp); +} + +/*! \brief Merge sort algorithm (serial). + * + * \param[in, out] base Array to be sorted. + * \param[in] n Number of elements. + * \param[in] s Size of each element. + * \param[in] compar Comparison function. + * \param[in, out] t Array for temporary data storage. + * + * \return void + */ +static void msort_serial_with_tmp(char *base, size_t n, size_t s, int (*compar)(const void *, const void *), char *t) +{ + char *tmp; + char *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = base; + b2 = base + n1 * s; + + msort_serial_with_tmp(b1, n1, s, compar, t); + msort_serial_with_tmp(b2, n2, s, compar, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(compar(b1, b2) < 0) + { + --n1; + memcpy(tmp, b1, s); + tmp += s; + b1 += s; + } + else + { + --n2; + memcpy(tmp, b2, s); + tmp += s; + b2 += s; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * s); + + memcpy(base, t, (n - n2) * s); +} + +/*! \brief Test function for parallel sort. + * + * \param[in] base Array to be checked. + * \param[in] nmemb Number of elements in array. + * \param[in] size Size of each element. + * \param[in] compar Comparison function. + * + * \return void + */ +void parallel_sort_test_order(char *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)) +{ + int i, recv, send; + size_t *nlist; + + nlist = (size_t *)mymalloc("nlist", NTask * sizeof(size_t)); + + MPI_Allgather(&nmemb, sizeof(size_t), MPI_BYTE, nlist, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(i = 0, recv = -1; i < ThisTask && nmemb > 0; i++) + if(nlist[i] > 0) + recv = i; + + for(i = ThisTask + 1, send = -1; nmemb > 0 && i < NTask; i++) + if(nlist[i] > 0) + { + send = i; + break; + } + + char *element = mymalloc("element", size); + + MPI_Request requests[2]; + int nreq = 0; + + if(send >= 0) + MPI_Isend(base + (nmemb - 1) * size, size, MPI_BYTE, send, TAG_TRANSFER, MPI_COMM_WORLD, &requests[nreq++]); + + if(recv >= 0) + MPI_Irecv(element, size, MPI_BYTE, recv, TAG_TRANSFER, MPI_COMM_WORLD, &requests[nreq++]); + + MPI_Waitall(nreq, requests, MPI_STATUSES_IGNORE); + + if(recv >= 0) + { + for(i = 0; i < nmemb; i++) + { + if(compar(element, base + i * size) > 0) + terminate("wrong order"); + } + } + + myfree(element); + myfree(nlist); +} diff --git a/src/amuse/community/arepo/src/utils/predicates.c b/src/amuse/community/arepo/src/utils/predicates.c new file mode 100644 index 0000000000..bd06b00166 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/predicates.c @@ -0,0 +1,4292 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/predicates.c + * \date 05/2018 + * \brief + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + */ + +/*****************************************************************************/ +/* */ +/* Routines for Arbitrary Precision Floating-point Arithmetic */ +/* and Fast Robust Geometric Predicates */ +/* (predicates.c) */ +/* */ +/* May 18, 1996 */ +/* */ +/* Placed in the public domain by */ +/* Jonathan Richard Shewchuk */ +/* School of Computer Science */ +/* Carnegie Mellon University */ +/* 5000 Forbes Avenue */ +/* Pittsburgh, Pennsylvania 15213-3891 */ +/* jrs@cs.cmu.edu */ +/* */ +/* This file contains C implementation of algorithms for exact addition */ +/* and multiplication of floating-point numbers, and predicates for */ +/* robustly performing the orientation and incircle tests used in */ +/* computational geometry. The algorithms and underlying theory are */ +/* described in Jonathan Richard Shewchuk. "Adaptive Precision Floating- */ +/* Point Arithmetic and Fast Robust Geometric Predicates." Technical */ +/* Report CMU-CS-96-140, School of Computer Science, Carnegie Mellon */ +/* University, Pittsburgh, Pennsylvania, May 1996. (Submitted to */ +/* Discrete & Computational Geometry.) */ +/* */ +/* This file, the paper listed above, and other information are available */ +/* from the Web page http://www.cs.cmu.edu/~quake/robust.html . */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Using this code: */ +/* */ +/* First, read the short or long version of the paper (from the Web page */ +/* above). */ +/* */ +/* Be sure to call exactinit() once, before calling any of the arithmetic */ +/* functions or geometric predicates. Also be sure to turn on the */ +/* optimizer when compiling this file. */ +/* */ +/* */ +/* Several geometric predicates are defined. Their parameters are all */ +/* points. Each point is an array of two or three floating-point */ +/* numbers. The geometric predicates, described in the papers, are */ +/* */ +/* orient2d(pa, pb, pc) */ +/* orient2dfast(pa, pb, pc) */ +/* orient3d(pa, pb, pc, pd) */ +/* orient3dfast(pa, pb, pc, pd) */ +/* incircle(pa, pb, pc, pd) */ +/* incirclefast(pa, pb, pc, pd) */ +/* insphere(pa, pb, pc, pd, pe) */ +/* inspherefast(pa, pb, pc, pd, pe) */ +/* */ +/* Those with suffix "fast" are approximate, non-robust versions. Those */ +/* without the suffix are adaptive precision, robust versions. There */ +/* are also versions with the suffices "exact" and "slow", which are */ +/* non-adaptive, exact arithmetic versions, which I use only for timings */ +/* in my arithmetic papers. */ +/* */ +/* */ +/* An expansion is represented by an array of floating-point numbers, */ +/* sorted from smallest to largest magnitude (possibly with interspersed */ +/* zeros). The length of each expansion is stored as a separate integer, */ +/* and each arithmetic function returns an integer which is the length */ +/* of the expansion it created. */ +/* */ +/* Several arithmetic functions are defined. Their parameters are */ +/* */ +/* e, f Input expansions */ +/* elen, flen Lengths of input expansions (must be >= 1) */ +/* h Output expansion */ +/* b Input scalar */ +/* */ +/* The arithmetic functions are */ +/* */ +/* grow_expansion(elen, e, b, h) */ +/* grow_expansion_zeroelim(elen, e, b, h) */ +/* expansion_sum(elen, e, flen, f, h) */ +/* expansion_sum_zeroelim1(elen, e, flen, f, h) */ +/* expansion_sum_zeroelim2(elen, e, flen, f, h) */ +/* fast_expansion_sum(elen, e, flen, f, h) */ +/* fast_expansion_sum_zeroelim(elen, e, flen, f, h) */ +/* linear_expansion_sum(elen, e, flen, f, h) */ +/* linear_expansion_sum_zeroelim(elen, e, flen, f, h) */ +/* scale_expansion(elen, e, b, h) */ +/* scale_expansion_zeroelim(elen, e, b, h) */ +/* compress(elen, e, h) */ +/* */ +/* All of these are described in the long version of the paper; some are */ +/* described in the short version. All return an integer that is the */ +/* length of h. Those with suffix _zeroelim perform zero elimination, */ +/* and are recommended over their counterparts. The procedure */ +/* fast_expansion_sum_zeroelim() (or linear_expansion_sum_zeroelim() on */ +/* processors that do not use the round-to-even tiebreaking rule) is */ +/* recommended over expansion_sum_zeroelim(). Each procedure has a */ +/* little note next to it (in the code below) that tells you whether or */ +/* not the output expansion may be the same array as one of the input */ +/* expansions. */ +/* */ +/* */ +/* If you look around below, you'll also find macros for a bunch of */ +/* simple unrolled arithmetic operations, and procedures for printing */ +/* expansions (commented out because they don't work with all C */ +/* compilers) and for generating random floating-point numbers whose */ +/* significand bits are all random. Most of the macros have undocumented */ +/* requirements that certain of their parameters should not be the same */ +/* variable; for safety, better to make sure all the parameters are */ +/* distinct variables. Feel free to send email to jrs@cs.cmu.edu if you */ +/* have questions. */ +/* */ +/*****************************************************************************/ + +#include +#include +#include +#include + +/* On some machines, the exact arithmetic routines might be defeated by the */ +/* use of internal extended precision floating-point registers. Sometimes */ +/* this problem can be fixed by defining certain values to be volatile, */ +/* thus forcing them to be stored to memory and rounded off. This isn't */ +/* a great solution, though, as it slows the arithmetic down. */ +/* */ +/* To try this out, write "#define INEXACT volatile" below. Normally, */ +/* however, INEXACT should be defined to be nothing. ("#define INEXACT".) */ + +#define INEXACT /* Nothing */ +/* #define INEXACT volatile */ + +#define REAL double /* float or double */ +#define REALPRINT doubleprint +#define REALRAND doublerand +#define NARROWRAND narrowdoublerand +#define UNIFORMRAND uniformdoublerand + +/* Which of the following two methods of finding the absolute values is */ +/* fastest is compiler-dependent. A few compilers can inline and optimize */ +/* the fabs() call; but most will incur the overhead of a function call, */ +/* which is disastrously slow. A faster way on IEEE machines might be to */ +/* mask the appropriate bit, but that's difficult to do in C. */ + +#define Absolute(a) ((a) >= 0.0 ? (a) : -(a)) +/* #define Absolute(a) fabs(a) */ + +/* Many of the operations are broken up into two pieces, a main part that */ +/* performs an approximate operation, and a "tail" that computes the */ +/* roundoff error of that operation. */ +/* */ +/* The operations Fast_Two_Sum(), Fast_Two_Diff(), Two_Sum(), Two_Diff(), */ +/* Split(), and Two_Product() are all implemented as described in the */ +/* reference. Each of these macros requires certain variables to be */ +/* defined in the calling routine. The variables `bvirt', `c', `abig', */ +/* `_i', `_j', `_k', `_l', `_m', and `_n' are declared `INEXACT' because */ +/* they store the result of an operation that may incur roundoff error. */ +/* The input parameter `x' (or the highest numbered `x_' parameter) must */ +/* also be declared `INEXACT'. */ + +#define Fast_Two_Sum_Tail(a, b, x, y) \ + bvirt = x - a; \ + y = b - bvirt + +#define Fast_Two_Sum(a, b, x, y) \ + x = (REAL)(a + b); \ + Fast_Two_Sum_Tail(a, b, x, y) + +#define Fast_Two_Diff_Tail(a, b, x, y) \ + bvirt = a - x; \ + y = bvirt - b + +#define Fast_Two_Diff(a, b, x, y) \ + x = (REAL)(a - b); \ + Fast_Two_Diff_Tail(a, b, x, y) + +#define Two_Sum_Tail(a, b, x, y) \ + bvirt = (REAL)(x - a); \ + avirt = x - bvirt; \ + bround = b - bvirt; \ + around = a - avirt; \ + y = around + bround + +#define Two_Sum(a, b, x, y) \ + x = (REAL)(a + b); \ + Two_Sum_Tail(a, b, x, y) + +#define Two_Diff_Tail(a, b, x, y) \ + bvirt = (REAL)(a - x); \ + avirt = x + bvirt; \ + bround = bvirt - b; \ + around = a - avirt; \ + y = around + bround + +#define Two_Diff(a, b, x, y) \ + x = (REAL)(a - b); \ + Two_Diff_Tail(a, b, x, y) + +#define Split(a, ahi, alo) \ + c = (REAL)(splitter * a); \ + abig = (REAL)(c - a); \ + ahi = c - abig; \ + alo = a - ahi + +#define Two_Product_Tail(a, b, x, y) \ + Split(a, ahi, alo); \ + Split(b, bhi, blo); \ + err1 = x - (ahi * bhi); \ + err2 = err1 - (alo * bhi); \ + err3 = err2 - (ahi * blo); \ + y = (alo * blo) - err3 + +#define Two_Product(a, b, x, y) \ + x = (REAL)(a * b); \ + Two_Product_Tail(a, b, x, y) + +/* Two_Product_Presplit() is Two_Product() where one of the inputs has */ +/* already been split. Avoids redundant splitting. */ + +#define Two_Product_Presplit(a, b, bhi, blo, x, y) \ + x = (REAL)(a * b); \ + Split(a, ahi, alo); \ + err1 = x - (ahi * bhi); \ + err2 = err1 - (alo * bhi); \ + err3 = err2 - (ahi * blo); \ + y = (alo * blo) - err3 + +/* Two_Product_2Presplit() is Two_Product() where both of the inputs have */ +/* already been split. Avoids redundant splitting. */ + +#define Two_Product_2Presplit(a, ahi, alo, b, bhi, blo, x, y) \ + x = (REAL)(a * b); \ + err1 = x - (ahi * bhi); \ + err2 = err1 - (alo * bhi); \ + err3 = err2 - (ahi * blo); \ + y = (alo * blo) - err3 + +/* Square() can be done more quickly than Two_Product(). */ + +#define Square_Tail(a, x, y) \ + Split(a, ahi, alo); \ + err1 = x - (ahi * ahi); \ + err3 = err1 - ((ahi + ahi) * alo); \ + y = (alo * alo) - err3 + +#define Square(a, x, y) \ + x = (REAL)(a * a); \ + Square_Tail(a, x, y) + +/* Macros for summing expansions of various fixed lengths. These are all */ +/* unrolled versions of Expansion_Sum(). */ + +#define Two_One_Sum(a1, a0, b, x2, x1, x0) \ + Two_Sum(a0, b, _i, x0); \ + Two_Sum(a1, _i, x2, x1) + +#define Two_One_Diff(a1, a0, b, x2, x1, x0) \ + Two_Diff(a0, b, _i, x0); \ + Two_Sum(a1, _i, x2, x1) + +#define Two_Two_Sum(a1, a0, b1, b0, x3, x2, x1, x0) \ + Two_One_Sum(a1, a0, b0, _j, _0, x0); \ + Two_One_Sum(_j, _0, b1, x3, x2, x1) + +#define Two_Two_Diff(a1, a0, b1, b0, x3, x2, x1, x0) \ + Two_One_Diff(a1, a0, b0, _j, _0, x0); \ + Two_One_Diff(_j, _0, b1, x3, x2, x1) + +#define Four_One_Sum(a3, a2, a1, a0, b, x4, x3, x2, x1, x0) \ + Two_One_Sum(a1, a0, b, _j, x1, x0); \ + Two_One_Sum(a3, a2, _j, x4, x3, x2) + +#define Four_Two_Sum(a3, a2, a1, a0, b1, b0, x5, x4, x3, x2, x1, x0) \ + Four_One_Sum(a3, a2, a1, a0, b0, _k, _2, _1, _0, x0); \ + Four_One_Sum(_k, _2, _1, _0, b1, x5, x4, x3, x2, x1) + +#define Four_Four_Sum(a3, a2, a1, a0, b4, b3, b1, b0, x7, x6, x5, x4, x3, x2, x1, x0) \ + Four_Two_Sum(a3, a2, a1, a0, b1, b0, _l, _2, _1, _0, x1, x0); \ + Four_Two_Sum(_l, _2, _1, _0, b4, b3, x7, x6, x5, x4, x3, x2) + +#define Eight_One_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b, x8, x7, x6, x5, x4, x3, x2, x1, x0) \ + Four_One_Sum(a3, a2, a1, a0, b, _j, x3, x2, x1, x0); \ + Four_One_Sum(a7, a6, a5, a4, _j, x8, x7, x6, x5, x4) + +#define Eight_Two_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b1, b0, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0) \ + Eight_One_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b0, _k, _6, _5, _4, _3, _2, _1, _0, x0); \ + Eight_One_Sum(_k, _6, _5, _4, _3, _2, _1, _0, b1, x9, x8, x7, x6, x5, x4, x3, x2, x1) + +#define Eight_Four_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b4, b3, b1, b0, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0) \ + Eight_Two_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b1, b0, _l, _6, _5, _4, _3, _2, _1, _0, x1, x0); \ + Eight_Two_Sum(_l, _6, _5, _4, _3, _2, _1, _0, b4, b3, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2) + +/* Macros for multiplying expansions of various fixed lengths. */ + +#define Two_One_Product(a1, a0, b, x3, x2, x1, x0) \ + Split(b, bhi, blo); \ + Two_Product_Presplit(a0, b, bhi, blo, _i, x0); \ + Two_Product_Presplit(a1, b, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, x1); \ + Fast_Two_Sum(_j, _k, x3, x2) + +#define Four_One_Product(a3, a2, a1, a0, b, x7, x6, x5, x4, x3, x2, x1, x0) \ + Split(b, bhi, blo); \ + Two_Product_Presplit(a0, b, bhi, blo, _i, x0); \ + Two_Product_Presplit(a1, b, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, x1); \ + Fast_Two_Sum(_j, _k, _i, x2); \ + Two_Product_Presplit(a2, b, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, x3); \ + Fast_Two_Sum(_j, _k, _i, x4); \ + Two_Product_Presplit(a3, b, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, x5); \ + Fast_Two_Sum(_j, _k, x7, x6) + +#define Two_Two_Product(a1, a0, b1, b0, x7, x6, x5, x4, x3, x2, x1, x0) \ + Split(a0, a0hi, a0lo); \ + Split(b0, bhi, blo); \ + Two_Product_2Presplit(a0, a0hi, a0lo, b0, bhi, blo, _i, x0); \ + Split(a1, a1hi, a1lo); \ + Two_Product_2Presplit(a1, a1hi, a1lo, b0, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, _1); \ + Fast_Two_Sum(_j, _k, _l, _2); \ + Split(b1, bhi, blo); \ + Two_Product_2Presplit(a0, a0hi, a0lo, b1, bhi, blo, _i, _0); \ + Two_Sum(_1, _0, _k, x1); \ + Two_Sum(_2, _k, _j, _1); \ + Two_Sum(_l, _j, _m, _2); \ + Two_Product_2Presplit(a1, a1hi, a1lo, b1, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _n, _0); \ + Two_Sum(_1, _0, _i, x2); \ + Two_Sum(_2, _i, _k, _1); \ + Two_Sum(_m, _k, _l, _2); \ + Two_Sum(_j, _n, _k, _0); \ + Two_Sum(_1, _0, _j, x3); \ + Two_Sum(_2, _j, _i, _1); \ + Two_Sum(_l, _i, _m, _2); \ + Two_Sum(_1, _k, _i, x4); \ + Two_Sum(_2, _i, _k, x5); \ + Two_Sum(_m, _k, x7, x6) + +/* An expansion of length two can be squared more quickly than finding the */ +/* product of two different expansions of length two, and the result is */ +/* guaranteed to have no more than six (rather than eight) components. */ + +#define Two_Square(a1, a0, x5, x4, x3, x2, x1, x0) \ + Square(a0, _j, x0); \ + _0 = a0 + a0; \ + Two_Product(a1, _0, _k, _1); \ + Two_One_Sum(_k, _1, _j, _l, _2, x1); \ + Square(a1, _j, _1); \ + Two_Two_Sum(_j, _1, _l, _2, x5, x4, x3, x2) + +REAL splitter; /* = 2^ceiling(p / 2) + 1. Used to split floats in half. */ +REAL epsilon; /* = 2^(-p). Used to estimate roundoff errors. */ + +/* A set of coefficients used to calculate maximum roundoff errors. */ +REAL resulterrbound; +REAL ccwerrboundA, ccwerrboundB, ccwerrboundC; +REAL o3derrboundA, o3derrboundB, o3derrboundC; +REAL iccerrboundA, iccerrboundB, iccerrboundC; +REAL isperrboundA, isperrboundB, isperrboundC; + +/*****************************************************************************/ +/* */ +/* doubleprint() Print the bit representation of a double. */ +/* */ +/* Useful for debugging exact arithmetic routines. */ +/* */ +/*****************************************************************************/ + +/* +void doubleprint(number) +double number; +{ + unsigned long long no; + unsigned long long sign, expo; + int exponent; + int i, bottomi; + + no = *(unsigned long long *) &number; + sign = no & 0x8000000000000000ll; + expo = (no >> 52) & 0x7ffll; + exponent = (int) expo; + exponent = exponent - 1023; + if (sign) { + printf("-"); + } else { + printf(" "); + } + if (exponent == -1023) { + printf( + "0.0000000000000000000000000000000000000000000000000000_ ( )"); + } else { + printf("1."); + bottomi = -1; + for (i = 0; i < 52; i++) { + if (no & 0x0008000000000000ll) { + printf("1"); + bottomi = i; + } else { + printf("0"); + } + no <<= 1; + } + printf("_%d (%d)", exponent, exponent - 1 - bottomi); + } +} +*/ + +/*****************************************************************************/ +/* */ +/* floatprint() Print the bit representation of a float. */ +/* */ +/* Useful for debugging exact arithmetic routines. */ +/* */ +/*****************************************************************************/ + +/* +void floatprint(number) +float number; +{ + unsigned no; + unsigned sign, expo; + int exponent; + int i, bottomi; + + no = *(unsigned *) &number; + sign = no & 0x80000000; + expo = (no >> 23) & 0xff; + exponent = (int) expo; + exponent = exponent - 127; + if (sign) { + printf("-"); + } else { + printf(" "); + } + if (exponent == -127) { + printf("0.00000000000000000000000_ ( )"); + } else { + printf("1."); + bottomi = -1; + for (i = 0; i < 23; i++) { + if (no & 0x00400000) { + printf("1"); + bottomi = i; + } else { + printf("0"); + } + no <<= 1; + } + printf("_%3d (%3d)", exponent, exponent - 1 - bottomi); + } +} +*/ + +/*****************************************************************************/ +/* */ +/* expansion_print() Print the bit representation of an expansion. */ +/* */ +/* Useful for debugging exact arithmetic routines. */ +/* */ +/*****************************************************************************/ + +/* +void expansion_print(elen, e) +int elen; +REAL *e; +{ + int i; + + for (i = elen - 1; i >= 0; i--) { + REALPRINT(e[i]); + if (i > 0) { + printf(" +\n"); + } else { + printf("\n"); + } + } +} +*/ + +/*****************************************************************************/ +/* */ +/* doublerand() Generate a double with random 53-bit significand and a */ +/* random exponent in [0, 511]. */ +/* */ +/*****************************************************************************/ + +double doublerand() +{ + double result; + double expo; + long a, b, c; + long i; + + a = random(); + b = random(); + c = random(); + result = (double)(a - 1073741824) * 8388608.0 + (double)(b >> 8); + for(i = 512, expo = 2; i <= 131072; i *= 2, expo = expo * expo) + { + if(c & i) + { + result *= expo; + } + } + return result; +} + +/*****************************************************************************/ +/* */ +/* narrowdoublerand() Generate a double with random 53-bit significand */ +/* and a random exponent in [0, 7]. */ +/* */ +/*****************************************************************************/ + +double narrowdoublerand() +{ + double result; + double expo; + long a, b, c; + long i; + + a = random(); + b = random(); + c = random(); + result = (double)(a - 1073741824) * 8388608.0 + (double)(b >> 8); + for(i = 512, expo = 2; i <= 2048; i *= 2, expo = expo * expo) + { + if(c & i) + { + result *= expo; + } + } + return result; +} + +/*****************************************************************************/ +/* */ +/* uniformdoublerand() Generate a double with random 53-bit significand. */ +/* */ +/*****************************************************************************/ + +double uniformdoublerand() +{ + double result; + long a, b; + + a = random(); + b = random(); + result = (double)(a - 1073741824) * 8388608.0 + (double)(b >> 8); + return result; +} + +/*****************************************************************************/ +/* */ +/* floatrand() Generate a float with random 24-bit significand and a */ +/* random exponent in [0, 63]. */ +/* */ +/*****************************************************************************/ + +float floatrand() +{ + float result; + float expo; + long a, c; + long i; + + a = random(); + c = random(); + result = (float)((a - 1073741824) >> 6); + for(i = 512, expo = 2; i <= 16384; i *= 2, expo = expo * expo) + { + if(c & i) + { + result *= expo; + } + } + return result; +} + +/*****************************************************************************/ +/* */ +/* narrowfloatrand() Generate a float with random 24-bit significand and */ +/* a random exponent in [0, 7]. */ +/* */ +/*****************************************************************************/ + +float narrowfloatrand() +{ + float result; + float expo; + long a, c; + long i; + + a = random(); + c = random(); + result = (float)((a - 1073741824) >> 6); + for(i = 512, expo = 2; i <= 2048; i *= 2, expo = expo * expo) + { + if(c & i) + { + result *= expo; + } + } + return result; +} + +/*****************************************************************************/ +/* */ +/* uniformfloatrand() Generate a float with random 24-bit significand. */ +/* */ +/*****************************************************************************/ + +float uniformfloatrand() +{ + float result; + long a; + + a = random(); + result = (float)((a - 1073741824) >> 6); + return result; +} + +/*****************************************************************************/ +/* */ +/* exactinit() Initialize the variables used for exact arithmetic. */ +/* */ +/* `epsilon' is the largest power of two such that 1.0 + epsilon = 1.0 in */ +/* floating-point arithmetic. `epsilon' bounds the relative roundoff */ +/* error. It is used for floating-point error analysis. */ +/* */ +/* `splitter' is used to split floating-point numbers into two half- */ +/* length significands for exact multiplication. */ +/* */ +/* I imagine that a highly optimizing compiler might be too smart for its */ +/* own good, and somehow cause this routine to fail, if it pretends that */ +/* floating-point arithmetic is too much like real arithmetic. */ +/* */ +/* Don't change this routine unless you fully understand it. */ +/* */ +/*****************************************************************************/ + +void exactinit() +{ + REAL half; + REAL check, lastcheck; + int every_other; + + every_other = 1; + half = 0.5; + epsilon = 1.0; + splitter = 1.0; + check = 1.0; + /* Repeatedly divide `epsilon' by two until it is too small to add to */ + /* one without causing roundoff. (Also check if the sum is equal to */ + /* the previous sum, for machines that round up instead of using exact */ + /* rounding. Not that this library will work on such machines anyway. */ + do + { + lastcheck = check; + epsilon *= half; + if(every_other) + { + splitter *= 2.0; + } + every_other = !every_other; + check = 1.0 + epsilon; + } + while((check != 1.0) && (check != lastcheck)); + splitter += 1.0; + + /* Error bounds for orientation and incircle tests. */ + resulterrbound = (3.0 + 8.0 * epsilon) * epsilon; + ccwerrboundA = (3.0 + 16.0 * epsilon) * epsilon; + ccwerrboundB = (2.0 + 12.0 * epsilon) * epsilon; + ccwerrboundC = (9.0 + 64.0 * epsilon) * epsilon * epsilon; + o3derrboundA = (7.0 + 56.0 * epsilon) * epsilon; + o3derrboundB = (3.0 + 28.0 * epsilon) * epsilon; + o3derrboundC = (26.0 + 288.0 * epsilon) * epsilon * epsilon; + iccerrboundA = (10.0 + 96.0 * epsilon) * epsilon; + iccerrboundB = (4.0 + 48.0 * epsilon) * epsilon; + iccerrboundC = (44.0 + 576.0 * epsilon) * epsilon * epsilon; + isperrboundA = (16.0 + 224.0 * epsilon) * epsilon; + isperrboundB = (5.0 + 72.0 * epsilon) * epsilon; + isperrboundC = (71.0 + 1408.0 * epsilon) * epsilon * epsilon; +} + +/*****************************************************************************/ +/* */ +/* grow_expansion() Add a scalar to an expansion. */ +/* */ +/* Sets h = e + b. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the strongly nonoverlapping and nonadjacent */ +/* properties as well. (That is, if e has one of these properties, so */ +/* will h.) */ +/* */ +/*****************************************************************************/ + +int grow_expansion(elen, e, b, h) /* e and h can be the same. */ + int elen; +REAL *e; +REAL b; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + int eindex; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + Q = b; + for(eindex = 0; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Sum(Q, enow, Qnew, h[eindex]); + Q = Qnew; + } + h[eindex] = Q; + return eindex + 1; +} + +/*****************************************************************************/ +/* */ +/* grow_expansion_zeroelim() Add a scalar to an expansion, eliminating */ +/* zero components from the output expansion. */ +/* */ +/* Sets h = e + b. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the strongly nonoverlapping and nonadjacent */ +/* properties as well. (That is, if e has one of these properties, so */ +/* will h.) */ +/* */ +/*****************************************************************************/ + +int grow_expansion_zeroelim(elen, e, b, h) /* e and h can be the same. */ + int elen; +REAL *e; +REAL b; +REAL *h; +{ + REAL Q, hh; + INEXACT REAL Qnew; + int eindex, hindex; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + hindex = 0; + Q = b; + for(eindex = 0; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Sum(Q, enow, Qnew, hh); + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + if((Q != 0.0) || (hindex == 0)) + { + h[hindex++] = Q; + } + return hindex; +} + +/*****************************************************************************/ +/* */ +/* expansion_sum() Sum two expansions. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the nonadjacent property as well. (That is, */ +/* if e has one of these properties, so will h.) Does NOT maintain the */ +/* strongly nonoverlapping property. */ +/* */ +/*****************************************************************************/ + +int expansion_sum(elen, e, flen, f, h) + /* e and h can be the same, but f and h cannot. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + int findex, hindex, hlast; + REAL hnow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + Q = f[0]; + for(hindex = 0; hindex < elen; hindex++) + { + hnow = e[hindex]; + Two_Sum(Q, hnow, Qnew, h[hindex]); + Q = Qnew; + } + h[hindex] = Q; + hlast = hindex; + for(findex = 1; findex < flen; findex++) + { + Q = f[findex]; + for(hindex = findex; hindex <= hlast; hindex++) + { + hnow = h[hindex]; + Two_Sum(Q, hnow, Qnew, h[hindex]); + Q = Qnew; + } + h[++hlast] = Q; + } + return hlast + 1; +} + +/*****************************************************************************/ +/* */ +/* expansion_sum_zeroelim1() Sum two expansions, eliminating zero */ +/* components from the output expansion. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the nonadjacent property as well. (That is, */ +/* if e has one of these properties, so will h.) Does NOT maintain the */ +/* strongly nonoverlapping property. */ +/* */ +/*****************************************************************************/ + +int expansion_sum_zeroelim1(elen, e, flen, f, h) + /* e and h can be the same, but f and h cannot. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + int index, findex, hindex, hlast; + REAL hnow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + Q = f[0]; + for(hindex = 0; hindex < elen; hindex++) + { + hnow = e[hindex]; + Two_Sum(Q, hnow, Qnew, h[hindex]); + Q = Qnew; + } + h[hindex] = Q; + hlast = hindex; + for(findex = 1; findex < flen; findex++) + { + Q = f[findex]; + for(hindex = findex; hindex <= hlast; hindex++) + { + hnow = h[hindex]; + Two_Sum(Q, hnow, Qnew, h[hindex]); + Q = Qnew; + } + h[++hlast] = Q; + } + hindex = -1; + for(index = 0; index <= hlast; index++) + { + hnow = h[index]; + if(hnow != 0.0) + { + h[++hindex] = hnow; + } + } + if(hindex == -1) + { + return 1; + } + else + { + return hindex + 1; + } +} + +/*****************************************************************************/ +/* */ +/* expansion_sum_zeroelim2() Sum two expansions, eliminating zero */ +/* components from the output expansion. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the nonadjacent property as well. (That is, */ +/* if e has one of these properties, so will h.) Does NOT maintain the */ +/* strongly nonoverlapping property. */ +/* */ +/*****************************************************************************/ + +int expansion_sum_zeroelim2(elen, e, flen, f, h) + /* e and h can be the same, but f and h cannot. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q, hh; + INEXACT REAL Qnew; + int eindex, findex, hindex, hlast; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + hindex = 0; + Q = f[0]; + for(eindex = 0; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Sum(Q, enow, Qnew, hh); + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + h[hindex] = Q; + hlast = hindex; + for(findex = 1; findex < flen; findex++) + { + hindex = 0; + Q = f[findex]; + for(eindex = 0; eindex <= hlast; eindex++) + { + enow = h[eindex]; + Two_Sum(Q, enow, Qnew, hh); + Q = Qnew; + if(hh != 0) + { + h[hindex++] = hh; + } + } + h[hindex] = Q; + hlast = hindex; + } + return hlast + 1; +} + +/*****************************************************************************/ +/* */ +/* fast_expansion_sum() Sum two expansions. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* If round-to-even is used (as with IEEE 754), maintains the strongly */ +/* nonoverlapping property. (That is, if e is strongly nonoverlapping, h */ +/* will be also.) Does NOT maintain the nonoverlapping or nonadjacent */ +/* properties. */ +/* */ +/*****************************************************************************/ + +int fast_expansion_sum(elen, e, flen, f, h) /* h cannot be e or f. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + INEXACT REAL bvirt; + REAL avirt, bround, around; + int eindex, findex, hindex; + REAL enow, fnow; + + enow = e[0]; + fnow = f[0]; + eindex = findex = 0; + if((fnow > enow) == (fnow > -enow)) + { + Q = enow; + enow = e[++eindex]; + } + else + { + Q = fnow; + fnow = f[++findex]; + } + hindex = 0; + if((eindex < elen) && (findex < flen)) + { + if((fnow > enow) == (fnow > -enow)) + { + Fast_Two_Sum(enow, Q, Qnew, h[0]); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, Q, Qnew, h[0]); + fnow = f[++findex]; + } + Q = Qnew; + hindex = 1; + while((eindex < elen) && (findex < flen)) + { + if((fnow > enow) == (fnow > -enow)) + { + Two_Sum(Q, enow, Qnew, h[hindex]); + enow = e[++eindex]; + } + else + { + Two_Sum(Q, fnow, Qnew, h[hindex]); + fnow = f[++findex]; + } + Q = Qnew; + hindex++; + } + } + while(eindex < elen) + { + Two_Sum(Q, enow, Qnew, h[hindex]); + enow = e[++eindex]; + Q = Qnew; + hindex++; + } + while(findex < flen) + { + Two_Sum(Q, fnow, Qnew, h[hindex]); + fnow = f[++findex]; + Q = Qnew; + hindex++; + } + h[hindex] = Q; + return hindex + 1; +} + +/*****************************************************************************/ +/* */ +/* fast_expansion_sum_zeroelim() Sum two expansions, eliminating zero */ +/* components from the output expansion. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* If round-to-even is used (as with IEEE 754), maintains the strongly */ +/* nonoverlapping property. (That is, if e is strongly nonoverlapping, h */ +/* will be also.) Does NOT maintain the nonoverlapping or nonadjacent */ +/* properties. */ +/* */ +/*****************************************************************************/ + +int fast_expansion_sum_zeroelim(elen, e, flen, f, h) /* h cannot be e or f. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + INEXACT REAL hh; + INEXACT REAL bvirt; + REAL avirt, bround, around; + int eindex, findex, hindex; + REAL enow, fnow; + + enow = e[0]; + fnow = f[0]; + eindex = findex = 0; + if((fnow > enow) == (fnow > -enow)) + { + Q = enow; + enow = e[++eindex]; + } + else + { + Q = fnow; + fnow = f[++findex]; + } + hindex = 0; + if((eindex < elen) && (findex < flen)) + { + if((fnow > enow) == (fnow > -enow)) + { + Fast_Two_Sum(enow, Q, Qnew, hh); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, Q, Qnew, hh); + fnow = f[++findex]; + } + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + while((eindex < elen) && (findex < flen)) + { + if((fnow > enow) == (fnow > -enow)) + { + Two_Sum(Q, enow, Qnew, hh); + enow = e[++eindex]; + } + else + { + Two_Sum(Q, fnow, Qnew, hh); + fnow = f[++findex]; + } + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + } + while(eindex < elen) + { + Two_Sum(Q, enow, Qnew, hh); + enow = e[++eindex]; + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + while(findex < flen) + { + Two_Sum(Q, fnow, Qnew, hh); + fnow = f[++findex]; + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + if((Q != 0.0) || (hindex == 0)) + { + h[hindex++] = Q; + } + return hindex; +} + +/*****************************************************************************/ +/* */ +/* linear_expansion_sum() Sum two expansions. */ +/* */ +/* Sets h = e + f. See either version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. (That is, if e is */ +/* nonoverlapping, h will be also.) */ +/* */ +/*****************************************************************************/ + +int linear_expansion_sum(elen, e, flen, f, h) /* h cannot be e or f. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q, q; + INEXACT REAL Qnew; + INEXACT REAL R; + INEXACT REAL bvirt; + REAL avirt, bround, around; + int eindex, findex, hindex; + REAL enow, fnow; + REAL g0; + + enow = e[0]; + fnow = f[0]; + eindex = findex = 0; + if((fnow > enow) == (fnow > -enow)) + { + g0 = enow; + enow = e[++eindex]; + } + else + { + g0 = fnow; + fnow = f[++findex]; + } + if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow)))) + { + Fast_Two_Sum(enow, g0, Qnew, q); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, g0, Qnew, q); + fnow = f[++findex]; + } + Q = Qnew; + for(hindex = 0; hindex < elen + flen - 2; hindex++) + { + if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow)))) + { + Fast_Two_Sum(enow, q, R, h[hindex]); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, q, R, h[hindex]); + fnow = f[++findex]; + } + Two_Sum(Q, R, Qnew, q); + Q = Qnew; + } + h[hindex] = q; + h[hindex + 1] = Q; + return hindex + 2; +} + +/*****************************************************************************/ +/* */ +/* linear_expansion_sum_zeroelim() Sum two expansions, eliminating zero */ +/* components from the output expansion. */ +/* */ +/* Sets h = e + f. See either version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. (That is, if e is */ +/* nonoverlapping, h will be also.) */ +/* */ +/*****************************************************************************/ + +int linear_expansion_sum_zeroelim(elen, e, flen, f, h) /* h cannot be e or f. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q, q, hh; + INEXACT REAL Qnew; + INEXACT REAL R; + INEXACT REAL bvirt; + REAL avirt, bround, around; + int eindex, findex, hindex; + int count; + REAL enow, fnow; + REAL g0; + + enow = e[0]; + fnow = f[0]; + eindex = findex = 0; + hindex = 0; + if((fnow > enow) == (fnow > -enow)) + { + g0 = enow; + enow = e[++eindex]; + } + else + { + g0 = fnow; + fnow = f[++findex]; + } + if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow)))) + { + Fast_Two_Sum(enow, g0, Qnew, q); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, g0, Qnew, q); + fnow = f[++findex]; + } + Q = Qnew; + for(count = 2; count < elen + flen; count++) + { + if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow)))) + { + Fast_Two_Sum(enow, q, R, hh); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, q, R, hh); + fnow = f[++findex]; + } + Two_Sum(Q, R, Qnew, q); + Q = Qnew; + if(hh != 0) + { + h[hindex++] = hh; + } + } + if(q != 0) + { + h[hindex++] = q; + } + if((Q != 0.0) || (hindex == 0)) + { + h[hindex++] = Q; + } + return hindex; +} + +/*****************************************************************************/ +/* */ +/* scale_expansion() Multiply an expansion by a scalar. */ +/* */ +/* Sets h = be. See either version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the strongly nonoverlapping and nonadjacent */ +/* properties as well. (That is, if e has one of these properties, so */ +/* will h.) */ +/* */ +/*****************************************************************************/ + +int scale_expansion(elen, e, b, h) /* e and h cannot be the same. */ + int elen; +REAL *e; +REAL b; +REAL *h; +{ + INEXACT REAL Q; + INEXACT REAL sum; + INEXACT REAL product1; + REAL product0; + int eindex, hindex; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + + Split(b, bhi, blo); + Two_Product_Presplit(e[0], b, bhi, blo, Q, h[0]); + hindex = 1; + for(eindex = 1; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Product_Presplit(enow, b, bhi, blo, product1, product0); + Two_Sum(Q, product0, sum, h[hindex]); + hindex++; + Two_Sum(product1, sum, Q, h[hindex]); + hindex++; + } + h[hindex] = Q; + return elen + elen; +} + +/*****************************************************************************/ +/* */ +/* scale_expansion_zeroelim() Multiply an expansion by a scalar, */ +/* eliminating zero components from the */ +/* output expansion. */ +/* */ +/* Sets h = be. See either version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the strongly nonoverlapping and nonadjacent */ +/* properties as well. (That is, if e has one of these properties, so */ +/* will h.) */ +/* */ +/*****************************************************************************/ + +int scale_expansion_zeroelim(elen, e, b, h) /* e and h cannot be the same. */ + int elen; +REAL *e; +REAL b; +REAL *h; +{ + INEXACT REAL Q, sum; + REAL hh; + INEXACT REAL product1; + REAL product0; + int eindex, hindex; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + + Split(b, bhi, blo); + Two_Product_Presplit(e[0], b, bhi, blo, Q, hh); + hindex = 0; + if(hh != 0) + { + h[hindex++] = hh; + } + for(eindex = 1; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Product_Presplit(enow, b, bhi, blo, product1, product0); + Two_Sum(Q, product0, sum, hh); + if(hh != 0) + { + h[hindex++] = hh; + } + Fast_Two_Sum(product1, sum, Q, hh); + if(hh != 0) + { + h[hindex++] = hh; + } + } + if((Q != 0.0) || (hindex == 0)) + { + h[hindex++] = Q; + } + return hindex; +} + +/*****************************************************************************/ +/* */ +/* compress() Compress an expansion. */ +/* */ +/* See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), then any nonoverlapping expansion is converted to a */ +/* nonadjacent expansion. */ +/* */ +/*****************************************************************************/ + +int compress(elen, e, h) /* e and h may be the same. */ + int elen; +REAL *e; +REAL *h; +{ + REAL Q, q; + INEXACT REAL Qnew; + int eindex, hindex; + INEXACT REAL bvirt; + REAL enow, hnow; + int top, bottom; + + bottom = elen - 1; + Q = e[bottom]; + for(eindex = elen - 2; eindex >= 0; eindex--) + { + enow = e[eindex]; + Fast_Two_Sum(Q, enow, Qnew, q); + if(q != 0) + { + h[bottom--] = Qnew; + Q = q; + } + else + { + Q = Qnew; + } + } + top = 0; + for(hindex = bottom + 1; hindex < elen; hindex++) + { + hnow = h[hindex]; + Fast_Two_Sum(hnow, Q, Qnew, q); + if(q != 0) + { + h[top++] = q; + } + Q = Qnew; + } + h[top] = Q; + return top + 1; +} + +/*****************************************************************************/ +/* */ +/* estimate() Produce a one-word estimate of an expansion's value. */ +/* */ +/* See either version of my paper for details. */ +/* */ +/*****************************************************************************/ + +REAL estimate(elen, e) int elen; +REAL *e; +{ + REAL Q; + int eindex; + + Q = e[0]; + for(eindex = 1; eindex < elen; eindex++) + { + Q += e[eindex]; + } + return Q; +} + +/*****************************************************************************/ +/* */ +/* orient2dfast() Approximate 2D orientation test. Nonrobust. */ +/* orient2dexact() Exact 2D orientation test. Robust. */ +/* orient2dslow() Another exact 2D orientation test. Robust. */ +/* orient2d() Adaptive exact 2D orientation test. Robust. */ +/* */ +/* Return a positive value if the points pa, pb, and pc occur */ +/* in counterclockwise order; a negative value if they occur */ +/* in clockwise order; and zero if they are collinear. The */ +/* result is also a rough approximation of twice the signed */ +/* area of the triangle defined by the three points. */ +/* */ +/* Only the first and last routine should be used; the middle two are for */ +/* timings. */ +/* */ +/* The last three use exact arithmetic to ensure a correct answer. The */ +/* result returned is the determinant of a matrix. In orient2d() only, */ +/* this determinant is computed adaptively, in the sense that exact */ +/* arithmetic is used only to the degree it is needed to ensure that the */ +/* returned value has the correct sign. Hence, orient2d() is usually quite */ +/* fast, but will run more slowly when the input points are collinear or */ +/* nearly so. */ +/* */ +/*****************************************************************************/ + +REAL orient2dfast(pa, pb, pc) REAL *pa; +REAL *pb; +REAL *pc; +{ + REAL acx, bcx, acy, bcy; + + acx = pa[0] - pc[0]; + bcx = pb[0] - pc[0]; + acy = pa[1] - pc[1]; + bcy = pb[1] - pc[1]; + return acx * bcy - acy * bcx; +} + +REAL orient2dexact(pa, pb, pc) REAL *pa; +REAL *pb; +REAL *pc; +{ + INEXACT REAL axby1, axcy1, bxcy1, bxay1, cxay1, cxby1; + REAL axby0, axcy0, bxcy0, bxay0, cxay0, cxby0; + REAL aterms[4], bterms[4], cterms[4]; + INEXACT REAL aterms3, bterms3, cterms3; + REAL v[8], w[12]; + int vlength, wlength; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + Two_Product(pa[0], pb[1], axby1, axby0); + Two_Product(pa[0], pc[1], axcy1, axcy0); + Two_Two_Diff(axby1, axby0, axcy1, axcy0, aterms3, aterms[2], aterms[1], aterms[0]); + aterms[3] = aterms3; + + Two_Product(pb[0], pc[1], bxcy1, bxcy0); + Two_Product(pb[0], pa[1], bxay1, bxay0); + Two_Two_Diff(bxcy1, bxcy0, bxay1, bxay0, bterms3, bterms[2], bterms[1], bterms[0]); + bterms[3] = bterms3; + + Two_Product(pc[0], pa[1], cxay1, cxay0); + Two_Product(pc[0], pb[1], cxby1, cxby0); + Two_Two_Diff(cxay1, cxay0, cxby1, cxby0, cterms3, cterms[2], cterms[1], cterms[0]); + cterms[3] = cterms3; + + vlength = fast_expansion_sum_zeroelim(4, aterms, 4, bterms, v); + wlength = fast_expansion_sum_zeroelim(vlength, v, 4, cterms, w); + + return w[wlength - 1]; +} + +REAL orient2dslow(pa, pb, pc) REAL *pa; +REAL *pb; +REAL *pc; +{ + INEXACT REAL acx, acy, bcx, bcy; + REAL acxtail, acytail; + REAL bcxtail, bcytail; + REAL negate, negatetail; + REAL axby[8], bxay[8]; + INEXACT REAL axby7, bxay7; + REAL deter[16]; + int deterlen; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL a0hi, a0lo, a1hi, a1lo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k, _l, _m, _n; + REAL _0, _1, _2; + + Two_Diff(pa[0], pc[0], acx, acxtail); + Two_Diff(pa[1], pc[1], acy, acytail); + Two_Diff(pb[0], pc[0], bcx, bcxtail); + Two_Diff(pb[1], pc[1], bcy, bcytail); + + Two_Two_Product(acx, acxtail, bcy, bcytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]); + axby[7] = axby7; + negate = -acy; + negatetail = -acytail; + Two_Two_Product(bcx, bcxtail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]); + bxay[7] = bxay7; + + deterlen = fast_expansion_sum_zeroelim(8, axby, 8, bxay, deter); + + return deter[deterlen - 1]; +} + +REAL orient2dadapt(pa, pb, pc, detsum) REAL *pa; +REAL *pb; +REAL *pc; +REAL detsum; +{ + INEXACT REAL acx, acy, bcx, bcy; + REAL acxtail, acytail, bcxtail, bcytail; + INEXACT REAL detleft, detright; + REAL detlefttail, detrighttail; + REAL det, errbound; + REAL B[4], C1[8], C2[12], D[16]; + INEXACT REAL B3; + int C1length, C2length, Dlength; + REAL u[4]; + INEXACT REAL u3; + INEXACT REAL s1, t1; + REAL s0, t0; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + acx = (REAL)(pa[0] - pc[0]); + bcx = (REAL)(pb[0] - pc[0]); + acy = (REAL)(pa[1] - pc[1]); + bcy = (REAL)(pb[1] - pc[1]); + + Two_Product(acx, bcy, detleft, detlefttail); + Two_Product(acy, bcx, detright, detrighttail); + + Two_Two_Diff(detleft, detlefttail, detright, detrighttail, B3, B[2], B[1], B[0]); + B[3] = B3; + + det = estimate(4, B); + errbound = ccwerrboundB * detsum; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Diff_Tail(pa[0], pc[0], acx, acxtail); + Two_Diff_Tail(pb[0], pc[0], bcx, bcxtail); + Two_Diff_Tail(pa[1], pc[1], acy, acytail); + Two_Diff_Tail(pb[1], pc[1], bcy, bcytail); + + if((acxtail == 0.0) && (acytail == 0.0) && (bcxtail == 0.0) && (bcytail == 0.0)) + { + return det; + } + + errbound = ccwerrboundC * detsum + resulterrbound * Absolute(det); + det += (acx * bcytail + bcy * acxtail) - (acy * bcxtail + bcx * acytail); + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Product(acxtail, bcy, s1, s0); + Two_Product(acytail, bcx, t1, t0); + Two_Two_Diff(s1, s0, t1, t0, u3, u[2], u[1], u[0]); + u[3] = u3; + C1length = fast_expansion_sum_zeroelim(4, B, 4, u, C1); + + Two_Product(acx, bcytail, s1, s0); + Two_Product(acy, bcxtail, t1, t0); + Two_Two_Diff(s1, s0, t1, t0, u3, u[2], u[1], u[0]); + u[3] = u3; + C2length = fast_expansion_sum_zeroelim(C1length, C1, 4, u, C2); + + Two_Product(acxtail, bcytail, s1, s0); + Two_Product(acytail, bcxtail, t1, t0); + Two_Two_Diff(s1, s0, t1, t0, u3, u[2], u[1], u[0]); + u[3] = u3; + Dlength = fast_expansion_sum_zeroelim(C2length, C2, 4, u, D); + + return (D[Dlength - 1]); +} + +REAL orient2d(pa, pb, pc) REAL *pa; +REAL *pb; +REAL *pc; +{ + REAL detleft, detright, det; + REAL detsum, errbound; + + detleft = (pa[0] - pc[0]) * (pb[1] - pc[1]); + detright = (pa[1] - pc[1]) * (pb[0] - pc[0]); + det = detleft - detright; + + if(detleft > 0.0) + { + if(detright <= 0.0) + { + return det; + } + else + { + detsum = detleft + detright; + } + } + else if(detleft < 0.0) + { + if(detright >= 0.0) + { + return det; + } + else + { + detsum = -detleft - detright; + } + } + else + { + return det; + } + + errbound = ccwerrboundA * detsum; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + return orient2dadapt(pa, pb, pc, detsum); +} + +/*****************************************************************************/ +/* */ +/* orient3dfast() Approximate 3D orientation test. Nonrobust. */ +/* orient3dexact() Exact 3D orientation test. Robust. */ +/* orient3dslow() Another exact 3D orientation test. Robust. */ +/* orient3d() Adaptive exact 3D orientation test. Robust. */ +/* */ +/* Return a positive value if the point pd lies below the */ +/* plane passing through pa, pb, and pc; "below" is defined so */ +/* that pa, pb, and pc appear in counterclockwise order when */ +/* viewed from above the plane. Returns a negative value if */ +/* pd lies above the plane. Returns zero if the points are */ +/* coplanar. The result is also a rough approximation of six */ +/* times the signed volume of the tetrahedron defined by the */ +/* four points. */ +/* */ +/* Only the first and last routine should be used; the middle two are for */ +/* timings. */ +/* */ +/* The last three use exact arithmetic to ensure a correct answer. The */ +/* result returned is the determinant of a matrix. In orient3d() only, */ +/* this determinant is computed adaptively, in the sense that exact */ +/* arithmetic is used only to the degree it is needed to ensure that the */ +/* returned value has the correct sign. Hence, orient3d() is usually quite */ +/* fast, but will run more slowly when the input points are coplanar or */ +/* nearly so. */ +/* */ +/*****************************************************************************/ + +REAL orient3dfast(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + REAL adx, bdx, cdx; + REAL ady, bdy, cdy; + REAL adz, bdz, cdz; + + adx = pa[0] - pd[0]; + bdx = pb[0] - pd[0]; + cdx = pc[0] - pd[0]; + ady = pa[1] - pd[1]; + bdy = pb[1] - pd[1]; + cdy = pc[1] - pd[1]; + adz = pa[2] - pd[2]; + bdz = pb[2] - pd[2]; + cdz = pc[2] - pd[2]; + + return adx * (bdy * cdz - bdz * cdy) + bdx * (cdy * adz - cdz * ady) + cdx * (ady * bdz - adz * bdy); +} + +REAL orient3dexact(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + INEXACT REAL axby1, bxcy1, cxdy1, dxay1, axcy1, bxdy1; + INEXACT REAL bxay1, cxby1, dxcy1, axdy1, cxay1, dxby1; + REAL axby0, bxcy0, cxdy0, dxay0, axcy0, bxdy0; + REAL bxay0, cxby0, dxcy0, axdy0, cxay0, dxby0; + REAL ab[4], bc[4], cd[4], da[4], ac[4], bd[4]; + REAL temp8[8]; + int templen; + REAL abc[12], bcd[12], cda[12], dab[12]; + int abclen, bcdlen, cdalen, dablen; + REAL adet[24], bdet[24], cdet[24], ddet[24]; + int alen, blen, clen, dlen; + REAL abdet[48], cddet[48]; + int ablen, cdlen; + REAL deter[96]; + int deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + Two_Product(pa[0], pb[1], axby1, axby0); + Two_Product(pb[0], pa[1], bxay1, bxay0); + Two_Two_Diff(axby1, axby0, bxay1, bxay0, ab[3], ab[2], ab[1], ab[0]); + + Two_Product(pb[0], pc[1], bxcy1, bxcy0); + Two_Product(pc[0], pb[1], cxby1, cxby0); + Two_Two_Diff(bxcy1, bxcy0, cxby1, cxby0, bc[3], bc[2], bc[1], bc[0]); + + Two_Product(pc[0], pd[1], cxdy1, cxdy0); + Two_Product(pd[0], pc[1], dxcy1, dxcy0); + Two_Two_Diff(cxdy1, cxdy0, dxcy1, dxcy0, cd[3], cd[2], cd[1], cd[0]); + + Two_Product(pd[0], pa[1], dxay1, dxay0); + Two_Product(pa[0], pd[1], axdy1, axdy0); + Two_Two_Diff(dxay1, dxay0, axdy1, axdy0, da[3], da[2], da[1], da[0]); + + Two_Product(pa[0], pc[1], axcy1, axcy0); + Two_Product(pc[0], pa[1], cxay1, cxay0); + Two_Two_Diff(axcy1, axcy0, cxay1, cxay0, ac[3], ac[2], ac[1], ac[0]); + + Two_Product(pb[0], pd[1], bxdy1, bxdy0); + Two_Product(pd[0], pb[1], dxby1, dxby0); + Two_Two_Diff(bxdy1, bxdy0, dxby1, dxby0, bd[3], bd[2], bd[1], bd[0]); + + templen = fast_expansion_sum_zeroelim(4, cd, 4, da, temp8); + cdalen = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, cda); + templen = fast_expansion_sum_zeroelim(4, da, 4, ab, temp8); + dablen = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, dab); + for(i = 0; i < 4; i++) + { + bd[i] = -bd[i]; + ac[i] = -ac[i]; + } + templen = fast_expansion_sum_zeroelim(4, ab, 4, bc, temp8); + abclen = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, abc); + templen = fast_expansion_sum_zeroelim(4, bc, 4, cd, temp8); + bcdlen = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, bcd); + + alen = scale_expansion_zeroelim(bcdlen, bcd, pa[2], adet); + blen = scale_expansion_zeroelim(cdalen, cda, -pb[2], bdet); + clen = scale_expansion_zeroelim(dablen, dab, pc[2], cdet); + dlen = scale_expansion_zeroelim(abclen, abc, -pd[2], ddet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, deter); + + return deter[deterlen - 1]; +} + +REAL orient3dslow(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + INEXACT REAL adx, ady, adz, bdx, bdy, bdz, cdx, cdy, cdz; + REAL adxtail, adytail, adztail; + REAL bdxtail, bdytail, bdztail; + REAL cdxtail, cdytail, cdztail; + REAL negate, negatetail; + INEXACT REAL axby7, bxcy7, axcy7, bxay7, cxby7, cxay7; + REAL axby[8], bxcy[8], axcy[8], bxay[8], cxby[8], cxay[8]; + REAL temp16[16], temp32[32], temp32t[32]; + int temp16len, temp32len, temp32tlen; + REAL adet[64], bdet[64], cdet[64]; + int alen, blen, clen; + REAL abdet[128]; + int ablen; + REAL deter[192]; + int deterlen; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL a0hi, a0lo, a1hi, a1lo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k, _l, _m, _n; + REAL _0, _1, _2; + + Two_Diff(pa[0], pd[0], adx, adxtail); + Two_Diff(pa[1], pd[1], ady, adytail); + Two_Diff(pa[2], pd[2], adz, adztail); + Two_Diff(pb[0], pd[0], bdx, bdxtail); + Two_Diff(pb[1], pd[1], bdy, bdytail); + Two_Diff(pb[2], pd[2], bdz, bdztail); + Two_Diff(pc[0], pd[0], cdx, cdxtail); + Two_Diff(pc[1], pd[1], cdy, cdytail); + Two_Diff(pc[2], pd[2], cdz, cdztail); + + Two_Two_Product(adx, adxtail, bdy, bdytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]); + axby[7] = axby7; + negate = -ady; + negatetail = -adytail; + Two_Two_Product(bdx, bdxtail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]); + bxay[7] = bxay7; + Two_Two_Product(bdx, bdxtail, cdy, cdytail, bxcy7, bxcy[6], bxcy[5], bxcy[4], bxcy[3], bxcy[2], bxcy[1], bxcy[0]); + bxcy[7] = bxcy7; + negate = -bdy; + negatetail = -bdytail; + Two_Two_Product(cdx, cdxtail, negate, negatetail, cxby7, cxby[6], cxby[5], cxby[4], cxby[3], cxby[2], cxby[1], cxby[0]); + cxby[7] = cxby7; + Two_Two_Product(cdx, cdxtail, ady, adytail, cxay7, cxay[6], cxay[5], cxay[4], cxay[3], cxay[2], cxay[1], cxay[0]); + cxay[7] = cxay7; + negate = -cdy; + negatetail = -cdytail; + Two_Two_Product(adx, adxtail, negate, negatetail, axcy7, axcy[6], axcy[5], axcy[4], axcy[3], axcy[2], axcy[1], axcy[0]); + axcy[7] = axcy7; + + temp16len = fast_expansion_sum_zeroelim(8, bxcy, 8, cxby, temp16); + temp32len = scale_expansion_zeroelim(temp16len, temp16, adz, temp32); + temp32tlen = scale_expansion_zeroelim(temp16len, temp16, adztail, temp32t); + alen = fast_expansion_sum_zeroelim(temp32len, temp32, temp32tlen, temp32t, adet); + + temp16len = fast_expansion_sum_zeroelim(8, cxay, 8, axcy, temp16); + temp32len = scale_expansion_zeroelim(temp16len, temp16, bdz, temp32); + temp32tlen = scale_expansion_zeroelim(temp16len, temp16, bdztail, temp32t); + blen = fast_expansion_sum_zeroelim(temp32len, temp32, temp32tlen, temp32t, bdet); + + temp16len = fast_expansion_sum_zeroelim(8, axby, 8, bxay, temp16); + temp32len = scale_expansion_zeroelim(temp16len, temp16, cdz, temp32); + temp32tlen = scale_expansion_zeroelim(temp16len, temp16, cdztail, temp32t); + clen = fast_expansion_sum_zeroelim(temp32len, temp32, temp32tlen, temp32t, cdet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, deter); + + return deter[deterlen - 1]; +} + +REAL orient3dadapt(pa, pb, pc, pd, permanent) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL permanent; +{ + INEXACT REAL adx, bdx, cdx, ady, bdy, cdy, adz, bdz, cdz; + REAL det, errbound; + + INEXACT REAL bdxcdy1, cdxbdy1, cdxady1, adxcdy1, adxbdy1, bdxady1; + REAL bdxcdy0, cdxbdy0, cdxady0, adxcdy0, adxbdy0, bdxady0; + REAL bc[4], ca[4], ab[4]; + INEXACT REAL bc3, ca3, ab3; + REAL adet[8], bdet[8], cdet[8]; + int alen, blen, clen; + REAL abdet[16]; + int ablen; + REAL *finnow, *finother, *finswap; + REAL fin1[192], fin2[192]; + int finlength; + + REAL adxtail, bdxtail, cdxtail; + REAL adytail, bdytail, cdytail; + REAL adztail, bdztail, cdztail; + INEXACT REAL at_blarge, at_clarge; + INEXACT REAL bt_clarge, bt_alarge; + INEXACT REAL ct_alarge, ct_blarge; + REAL at_b[4], at_c[4], bt_c[4], bt_a[4], ct_a[4], ct_b[4]; + int at_blen, at_clen, bt_clen, bt_alen, ct_alen, ct_blen; + INEXACT REAL bdxt_cdy1, cdxt_bdy1, cdxt_ady1; + INEXACT REAL adxt_cdy1, adxt_bdy1, bdxt_ady1; + REAL bdxt_cdy0, cdxt_bdy0, cdxt_ady0; + REAL adxt_cdy0, adxt_bdy0, bdxt_ady0; + INEXACT REAL bdyt_cdx1, cdyt_bdx1, cdyt_adx1; + INEXACT REAL adyt_cdx1, adyt_bdx1, bdyt_adx1; + REAL bdyt_cdx0, cdyt_bdx0, cdyt_adx0; + REAL adyt_cdx0, adyt_bdx0, bdyt_adx0; + REAL bct[8], cat[8], abt[8]; + int bctlen, catlen, abtlen; + INEXACT REAL bdxt_cdyt1, cdxt_bdyt1, cdxt_adyt1; + INEXACT REAL adxt_cdyt1, adxt_bdyt1, bdxt_adyt1; + REAL bdxt_cdyt0, cdxt_bdyt0, cdxt_adyt0; + REAL adxt_cdyt0, adxt_bdyt0, bdxt_adyt0; + REAL u[4], v[12], w[16]; + INEXACT REAL u3; + int vlength, wlength; + REAL negate; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k; + REAL _0; + + adx = (REAL)(pa[0] - pd[0]); + bdx = (REAL)(pb[0] - pd[0]); + cdx = (REAL)(pc[0] - pd[0]); + ady = (REAL)(pa[1] - pd[1]); + bdy = (REAL)(pb[1] - pd[1]); + cdy = (REAL)(pc[1] - pd[1]); + adz = (REAL)(pa[2] - pd[2]); + bdz = (REAL)(pb[2] - pd[2]); + cdz = (REAL)(pc[2] - pd[2]); + + Two_Product(bdx, cdy, bdxcdy1, bdxcdy0); + Two_Product(cdx, bdy, cdxbdy1, cdxbdy0); + Two_Two_Diff(bdxcdy1, bdxcdy0, cdxbdy1, cdxbdy0, bc3, bc[2], bc[1], bc[0]); + bc[3] = bc3; + alen = scale_expansion_zeroelim(4, bc, adz, adet); + + Two_Product(cdx, ady, cdxady1, cdxady0); + Two_Product(adx, cdy, adxcdy1, adxcdy0); + Two_Two_Diff(cdxady1, cdxady0, adxcdy1, adxcdy0, ca3, ca[2], ca[1], ca[0]); + ca[3] = ca3; + blen = scale_expansion_zeroelim(4, ca, bdz, bdet); + + Two_Product(adx, bdy, adxbdy1, adxbdy0); + Two_Product(bdx, ady, bdxady1, bdxady0); + Two_Two_Diff(adxbdy1, adxbdy0, bdxady1, bdxady0, ab3, ab[2], ab[1], ab[0]); + ab[3] = ab3; + clen = scale_expansion_zeroelim(4, ab, cdz, cdet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + finlength = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, fin1); + + det = estimate(finlength, fin1); + errbound = o3derrboundB * permanent; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Diff_Tail(pa[0], pd[0], adx, adxtail); + Two_Diff_Tail(pb[0], pd[0], bdx, bdxtail); + Two_Diff_Tail(pc[0], pd[0], cdx, cdxtail); + Two_Diff_Tail(pa[1], pd[1], ady, adytail); + Two_Diff_Tail(pb[1], pd[1], bdy, bdytail); + Two_Diff_Tail(pc[1], pd[1], cdy, cdytail); + Two_Diff_Tail(pa[2], pd[2], adz, adztail); + Two_Diff_Tail(pb[2], pd[2], bdz, bdztail); + Two_Diff_Tail(pc[2], pd[2], cdz, cdztail); + + if((adxtail == 0.0) && (bdxtail == 0.0) && (cdxtail == 0.0) && (adytail == 0.0) && (bdytail == 0.0) && (cdytail == 0.0) && + (adztail == 0.0) && (bdztail == 0.0) && (cdztail == 0.0)) + { + return det; + } + + errbound = o3derrboundC * permanent + resulterrbound * Absolute(det); + det += (adz * ((bdx * cdytail + cdy * bdxtail) - (bdy * cdxtail + cdx * bdytail)) + adztail * (bdx * cdy - bdy * cdx)) + + (bdz * ((cdx * adytail + ady * cdxtail) - (cdy * adxtail + adx * cdytail)) + bdztail * (cdx * ady - cdy * adx)) + + (cdz * ((adx * bdytail + bdy * adxtail) - (ady * bdxtail + bdx * adytail)) + cdztail * (adx * bdy - ady * bdx)); + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + finnow = fin1; + finother = fin2; + + if(adxtail == 0.0) + { + if(adytail == 0.0) + { + at_b[0] = 0.0; + at_blen = 1; + at_c[0] = 0.0; + at_clen = 1; + } + else + { + negate = -adytail; + Two_Product(negate, bdx, at_blarge, at_b[0]); + at_b[1] = at_blarge; + at_blen = 2; + Two_Product(adytail, cdx, at_clarge, at_c[0]); + at_c[1] = at_clarge; + at_clen = 2; + } + } + else + { + if(adytail == 0.0) + { + Two_Product(adxtail, bdy, at_blarge, at_b[0]); + at_b[1] = at_blarge; + at_blen = 2; + negate = -adxtail; + Two_Product(negate, cdy, at_clarge, at_c[0]); + at_c[1] = at_clarge; + at_clen = 2; + } + else + { + Two_Product(adxtail, bdy, adxt_bdy1, adxt_bdy0); + Two_Product(adytail, bdx, adyt_bdx1, adyt_bdx0); + Two_Two_Diff(adxt_bdy1, adxt_bdy0, adyt_bdx1, adyt_bdx0, at_blarge, at_b[2], at_b[1], at_b[0]); + at_b[3] = at_blarge; + at_blen = 4; + Two_Product(adytail, cdx, adyt_cdx1, adyt_cdx0); + Two_Product(adxtail, cdy, adxt_cdy1, adxt_cdy0); + Two_Two_Diff(adyt_cdx1, adyt_cdx0, adxt_cdy1, adxt_cdy0, at_clarge, at_c[2], at_c[1], at_c[0]); + at_c[3] = at_clarge; + at_clen = 4; + } + } + if(bdxtail == 0.0) + { + if(bdytail == 0.0) + { + bt_c[0] = 0.0; + bt_clen = 1; + bt_a[0] = 0.0; + bt_alen = 1; + } + else + { + negate = -bdytail; + Two_Product(negate, cdx, bt_clarge, bt_c[0]); + bt_c[1] = bt_clarge; + bt_clen = 2; + Two_Product(bdytail, adx, bt_alarge, bt_a[0]); + bt_a[1] = bt_alarge; + bt_alen = 2; + } + } + else + { + if(bdytail == 0.0) + { + Two_Product(bdxtail, cdy, bt_clarge, bt_c[0]); + bt_c[1] = bt_clarge; + bt_clen = 2; + negate = -bdxtail; + Two_Product(negate, ady, bt_alarge, bt_a[0]); + bt_a[1] = bt_alarge; + bt_alen = 2; + } + else + { + Two_Product(bdxtail, cdy, bdxt_cdy1, bdxt_cdy0); + Two_Product(bdytail, cdx, bdyt_cdx1, bdyt_cdx0); + Two_Two_Diff(bdxt_cdy1, bdxt_cdy0, bdyt_cdx1, bdyt_cdx0, bt_clarge, bt_c[2], bt_c[1], bt_c[0]); + bt_c[3] = bt_clarge; + bt_clen = 4; + Two_Product(bdytail, adx, bdyt_adx1, bdyt_adx0); + Two_Product(bdxtail, ady, bdxt_ady1, bdxt_ady0); + Two_Two_Diff(bdyt_adx1, bdyt_adx0, bdxt_ady1, bdxt_ady0, bt_alarge, bt_a[2], bt_a[1], bt_a[0]); + bt_a[3] = bt_alarge; + bt_alen = 4; + } + } + if(cdxtail == 0.0) + { + if(cdytail == 0.0) + { + ct_a[0] = 0.0; + ct_alen = 1; + ct_b[0] = 0.0; + ct_blen = 1; + } + else + { + negate = -cdytail; + Two_Product(negate, adx, ct_alarge, ct_a[0]); + ct_a[1] = ct_alarge; + ct_alen = 2; + Two_Product(cdytail, bdx, ct_blarge, ct_b[0]); + ct_b[1] = ct_blarge; + ct_blen = 2; + } + } + else + { + if(cdytail == 0.0) + { + Two_Product(cdxtail, ady, ct_alarge, ct_a[0]); + ct_a[1] = ct_alarge; + ct_alen = 2; + negate = -cdxtail; + Two_Product(negate, bdy, ct_blarge, ct_b[0]); + ct_b[1] = ct_blarge; + ct_blen = 2; + } + else + { + Two_Product(cdxtail, ady, cdxt_ady1, cdxt_ady0); + Two_Product(cdytail, adx, cdyt_adx1, cdyt_adx0); + Two_Two_Diff(cdxt_ady1, cdxt_ady0, cdyt_adx1, cdyt_adx0, ct_alarge, ct_a[2], ct_a[1], ct_a[0]); + ct_a[3] = ct_alarge; + ct_alen = 4; + Two_Product(cdytail, bdx, cdyt_bdx1, cdyt_bdx0); + Two_Product(cdxtail, bdy, cdxt_bdy1, cdxt_bdy0); + Two_Two_Diff(cdyt_bdx1, cdyt_bdx0, cdxt_bdy1, cdxt_bdy0, ct_blarge, ct_b[2], ct_b[1], ct_b[0]); + ct_b[3] = ct_blarge; + ct_blen = 4; + } + } + + bctlen = fast_expansion_sum_zeroelim(bt_clen, bt_c, ct_blen, ct_b, bct); + wlength = scale_expansion_zeroelim(bctlen, bct, adz, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + catlen = fast_expansion_sum_zeroelim(ct_alen, ct_a, at_clen, at_c, cat); + wlength = scale_expansion_zeroelim(catlen, cat, bdz, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + abtlen = fast_expansion_sum_zeroelim(at_blen, at_b, bt_alen, bt_a, abt); + wlength = scale_expansion_zeroelim(abtlen, abt, cdz, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + if(adztail != 0.0) + { + vlength = scale_expansion_zeroelim(4, bc, adztail, v); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, vlength, v, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdztail != 0.0) + { + vlength = scale_expansion_zeroelim(4, ca, bdztail, v); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, vlength, v, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdztail != 0.0) + { + vlength = scale_expansion_zeroelim(4, ab, cdztail, v); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, vlength, v, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + if(adxtail != 0.0) + { + if(bdytail != 0.0) + { + Two_Product(adxtail, bdytail, adxt_bdyt1, adxt_bdyt0); + Two_One_Product(adxt_bdyt1, adxt_bdyt0, cdz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(cdztail != 0.0) + { + Two_One_Product(adxt_bdyt1, adxt_bdyt0, cdztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if(cdytail != 0.0) + { + negate = -adxtail; + Two_Product(negate, cdytail, adxt_cdyt1, adxt_cdyt0); + Two_One_Product(adxt_cdyt1, adxt_cdyt0, bdz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(bdztail != 0.0) + { + Two_One_Product(adxt_cdyt1, adxt_cdyt0, bdztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + } + if(bdxtail != 0.0) + { + if(cdytail != 0.0) + { + Two_Product(bdxtail, cdytail, bdxt_cdyt1, bdxt_cdyt0); + Two_One_Product(bdxt_cdyt1, bdxt_cdyt0, adz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(adztail != 0.0) + { + Two_One_Product(bdxt_cdyt1, bdxt_cdyt0, adztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if(adytail != 0.0) + { + negate = -bdxtail; + Two_Product(negate, adytail, bdxt_adyt1, bdxt_adyt0); + Two_One_Product(bdxt_adyt1, bdxt_adyt0, cdz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(cdztail != 0.0) + { + Two_One_Product(bdxt_adyt1, bdxt_adyt0, cdztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + } + if(cdxtail != 0.0) + { + if(adytail != 0.0) + { + Two_Product(cdxtail, adytail, cdxt_adyt1, cdxt_adyt0); + Two_One_Product(cdxt_adyt1, cdxt_adyt0, bdz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(bdztail != 0.0) + { + Two_One_Product(cdxt_adyt1, cdxt_adyt0, bdztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if(bdytail != 0.0) + { + negate = -cdxtail; + Two_Product(negate, bdytail, cdxt_bdyt1, cdxt_bdyt0); + Two_One_Product(cdxt_bdyt1, cdxt_bdyt0, adz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(adztail != 0.0) + { + Two_One_Product(cdxt_bdyt1, cdxt_bdyt0, adztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + } + + if(adztail != 0.0) + { + wlength = scale_expansion_zeroelim(bctlen, bct, adztail, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdztail != 0.0) + { + wlength = scale_expansion_zeroelim(catlen, cat, bdztail, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdztail != 0.0) + { + wlength = scale_expansion_zeroelim(abtlen, abt, cdztail, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + return finnow[finlength - 1]; +} + +REAL orient3d(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + REAL adx, bdx, cdx, ady, bdy, cdy, adz, bdz, cdz; + REAL bdxcdy, cdxbdy, cdxady, adxcdy, adxbdy, bdxady; + REAL det; + REAL permanent, errbound; + + adx = pa[0] - pd[0]; + bdx = pb[0] - pd[0]; + cdx = pc[0] - pd[0]; + ady = pa[1] - pd[1]; + bdy = pb[1] - pd[1]; + cdy = pc[1] - pd[1]; + adz = pa[2] - pd[2]; + bdz = pb[2] - pd[2]; + cdz = pc[2] - pd[2]; + + bdxcdy = bdx * cdy; + cdxbdy = cdx * bdy; + + cdxady = cdx * ady; + adxcdy = adx * cdy; + + adxbdy = adx * bdy; + bdxady = bdx * ady; + + det = adz * (bdxcdy - cdxbdy) + bdz * (cdxady - adxcdy) + cdz * (adxbdy - bdxady); + + permanent = (Absolute(bdxcdy) + Absolute(cdxbdy)) * Absolute(adz) + (Absolute(cdxady) + Absolute(adxcdy)) * Absolute(bdz) + + (Absolute(adxbdy) + Absolute(bdxady)) * Absolute(cdz); + errbound = o3derrboundA * permanent; + if((det > errbound) || (-det > errbound)) + { + return det; + } + + return orient3dadapt(pa, pb, pc, pd, permanent); +} + +/*****************************************************************************/ +/* */ +/* incirclefast() Approximate 2D incircle test. Nonrobust. */ +/* incircleexact() Exact 2D incircle test. Robust. */ +/* incircleslow() Another exact 2D incircle test. Robust. */ +/* incircle() Adaptive exact 2D incircle test. Robust. */ +/* */ +/* Return a positive value if the point pd lies inside the */ +/* circle passing through pa, pb, and pc; a negative value if */ +/* it lies outside; and zero if the four points are cocircular.*/ +/* The points pa, pb, and pc must be in counterclockwise */ +/* order, or the sign of the result will be reversed. */ +/* */ +/* Only the first and last routine should be used; the middle two are for */ +/* timings. */ +/* */ +/* The last three use exact arithmetic to ensure a correct answer. The */ +/* result returned is the determinant of a matrix. In incircle() only, */ +/* this determinant is computed adaptively, in the sense that exact */ +/* arithmetic is used only to the degree it is needed to ensure that the */ +/* returned value has the correct sign. Hence, incircle() is usually quite */ +/* fast, but will run more slowly when the input points are cocircular or */ +/* nearly so. */ +/* */ +/*****************************************************************************/ + +REAL incirclefast(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + REAL adx, ady, bdx, bdy, cdx, cdy; + REAL abdet, bcdet, cadet; + REAL alift, blift, clift; + + adx = pa[0] - pd[0]; + ady = pa[1] - pd[1]; + bdx = pb[0] - pd[0]; + bdy = pb[1] - pd[1]; + cdx = pc[0] - pd[0]; + cdy = pc[1] - pd[1]; + + abdet = adx * bdy - bdx * ady; + bcdet = bdx * cdy - cdx * bdy; + cadet = cdx * ady - adx * cdy; + alift = adx * adx + ady * ady; + blift = bdx * bdx + bdy * bdy; + clift = cdx * cdx + cdy * cdy; + + return alift * bcdet + blift * cadet + clift * abdet; +} + +REAL incircleexact(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + INEXACT REAL axby1, bxcy1, cxdy1, dxay1, axcy1, bxdy1; + INEXACT REAL bxay1, cxby1, dxcy1, axdy1, cxay1, dxby1; + REAL axby0, bxcy0, cxdy0, dxay0, axcy0, bxdy0; + REAL bxay0, cxby0, dxcy0, axdy0, cxay0, dxby0; + REAL ab[4], bc[4], cd[4], da[4], ac[4], bd[4]; + REAL temp8[8]; + int templen; + REAL abc[12], bcd[12], cda[12], dab[12]; + int abclen, bcdlen, cdalen, dablen; + REAL det24x[24], det24y[24], det48x[48], det48y[48]; + int xlen, ylen; + REAL adet[96], bdet[96], cdet[96], ddet[96]; + int alen, blen, clen, dlen; + REAL abdet[192], cddet[192]; + int ablen, cdlen; + REAL deter[384]; + int deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + Two_Product(pa[0], pb[1], axby1, axby0); + Two_Product(pb[0], pa[1], bxay1, bxay0); + Two_Two_Diff(axby1, axby0, bxay1, bxay0, ab[3], ab[2], ab[1], ab[0]); + + Two_Product(pb[0], pc[1], bxcy1, bxcy0); + Two_Product(pc[0], pb[1], cxby1, cxby0); + Two_Two_Diff(bxcy1, bxcy0, cxby1, cxby0, bc[3], bc[2], bc[1], bc[0]); + + Two_Product(pc[0], pd[1], cxdy1, cxdy0); + Two_Product(pd[0], pc[1], dxcy1, dxcy0); + Two_Two_Diff(cxdy1, cxdy0, dxcy1, dxcy0, cd[3], cd[2], cd[1], cd[0]); + + Two_Product(pd[0], pa[1], dxay1, dxay0); + Two_Product(pa[0], pd[1], axdy1, axdy0); + Two_Two_Diff(dxay1, dxay0, axdy1, axdy0, da[3], da[2], da[1], da[0]); + + Two_Product(pa[0], pc[1], axcy1, axcy0); + Two_Product(pc[0], pa[1], cxay1, cxay0); + Two_Two_Diff(axcy1, axcy0, cxay1, cxay0, ac[3], ac[2], ac[1], ac[0]); + + Two_Product(pb[0], pd[1], bxdy1, bxdy0); + Two_Product(pd[0], pb[1], dxby1, dxby0); + Two_Two_Diff(bxdy1, bxdy0, dxby1, dxby0, bd[3], bd[2], bd[1], bd[0]); + + templen = fast_expansion_sum_zeroelim(4, cd, 4, da, temp8); + cdalen = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, cda); + templen = fast_expansion_sum_zeroelim(4, da, 4, ab, temp8); + dablen = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, dab); + for(i = 0; i < 4; i++) + { + bd[i] = -bd[i]; + ac[i] = -ac[i]; + } + templen = fast_expansion_sum_zeroelim(4, ab, 4, bc, temp8); + abclen = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, abc); + templen = fast_expansion_sum_zeroelim(4, bc, 4, cd, temp8); + bcdlen = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, bcd); + + xlen = scale_expansion_zeroelim(bcdlen, bcd, pa[0], det24x); + xlen = scale_expansion_zeroelim(xlen, det24x, pa[0], det48x); + ylen = scale_expansion_zeroelim(bcdlen, bcd, pa[1], det24y); + ylen = scale_expansion_zeroelim(ylen, det24y, pa[1], det48y); + alen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, adet); + + xlen = scale_expansion_zeroelim(cdalen, cda, pb[0], det24x); + xlen = scale_expansion_zeroelim(xlen, det24x, -pb[0], det48x); + ylen = scale_expansion_zeroelim(cdalen, cda, pb[1], det24y); + ylen = scale_expansion_zeroelim(ylen, det24y, -pb[1], det48y); + blen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, bdet); + + xlen = scale_expansion_zeroelim(dablen, dab, pc[0], det24x); + xlen = scale_expansion_zeroelim(xlen, det24x, pc[0], det48x); + ylen = scale_expansion_zeroelim(dablen, dab, pc[1], det24y); + ylen = scale_expansion_zeroelim(ylen, det24y, pc[1], det48y); + clen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, cdet); + + xlen = scale_expansion_zeroelim(abclen, abc, pd[0], det24x); + xlen = scale_expansion_zeroelim(xlen, det24x, -pd[0], det48x); + ylen = scale_expansion_zeroelim(abclen, abc, pd[1], det24y); + ylen = scale_expansion_zeroelim(ylen, det24y, -pd[1], det48y); + dlen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, ddet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, deter); + + return deter[deterlen - 1]; +} + +REAL incircleslow(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + INEXACT REAL adx, bdx, cdx, ady, bdy, cdy; + REAL adxtail, bdxtail, cdxtail; + REAL adytail, bdytail, cdytail; + REAL negate, negatetail; + INEXACT REAL axby7, bxcy7, axcy7, bxay7, cxby7, cxay7; + REAL axby[8], bxcy[8], axcy[8], bxay[8], cxby[8], cxay[8]; + REAL temp16[16]; + int temp16len; + REAL detx[32], detxx[64], detxt[32], detxxt[64], detxtxt[64]; + int xlen, xxlen, xtlen, xxtlen, xtxtlen; + REAL x1[128], x2[192]; + int x1len, x2len; + REAL dety[32], detyy[64], detyt[32], detyyt[64], detytyt[64]; + int ylen, yylen, ytlen, yytlen, ytytlen; + REAL y1[128], y2[192]; + int y1len, y2len; + REAL adet[384], bdet[384], cdet[384], abdet[768], deter[1152]; + int alen, blen, clen, ablen, deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL a0hi, a0lo, a1hi, a1lo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k, _l, _m, _n; + REAL _0, _1, _2; + + Two_Diff(pa[0], pd[0], adx, adxtail); + Two_Diff(pa[1], pd[1], ady, adytail); + Two_Diff(pb[0], pd[0], bdx, bdxtail); + Two_Diff(pb[1], pd[1], bdy, bdytail); + Two_Diff(pc[0], pd[0], cdx, cdxtail); + Two_Diff(pc[1], pd[1], cdy, cdytail); + + Two_Two_Product(adx, adxtail, bdy, bdytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]); + axby[7] = axby7; + negate = -ady; + negatetail = -adytail; + Two_Two_Product(bdx, bdxtail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]); + bxay[7] = bxay7; + Two_Two_Product(bdx, bdxtail, cdy, cdytail, bxcy7, bxcy[6], bxcy[5], bxcy[4], bxcy[3], bxcy[2], bxcy[1], bxcy[0]); + bxcy[7] = bxcy7; + negate = -bdy; + negatetail = -bdytail; + Two_Two_Product(cdx, cdxtail, negate, negatetail, cxby7, cxby[6], cxby[5], cxby[4], cxby[3], cxby[2], cxby[1], cxby[0]); + cxby[7] = cxby7; + Two_Two_Product(cdx, cdxtail, ady, adytail, cxay7, cxay[6], cxay[5], cxay[4], cxay[3], cxay[2], cxay[1], cxay[0]); + cxay[7] = cxay7; + negate = -cdy; + negatetail = -cdytail; + Two_Two_Product(adx, adxtail, negate, negatetail, axcy7, axcy[6], axcy[5], axcy[4], axcy[3], axcy[2], axcy[1], axcy[0]); + axcy[7] = axcy7; + + temp16len = fast_expansion_sum_zeroelim(8, bxcy, 8, cxby, temp16); + + xlen = scale_expansion_zeroelim(temp16len, temp16, adx, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, adx, detxx); + xtlen = scale_expansion_zeroelim(temp16len, temp16, adxtail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, adx, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, adxtail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + + ylen = scale_expansion_zeroelim(temp16len, temp16, ady, dety); + yylen = scale_expansion_zeroelim(ylen, dety, ady, detyy); + ytlen = scale_expansion_zeroelim(temp16len, temp16, adytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, ady, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, adytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + + alen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, adet); + + temp16len = fast_expansion_sum_zeroelim(8, cxay, 8, axcy, temp16); + + xlen = scale_expansion_zeroelim(temp16len, temp16, bdx, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, bdx, detxx); + xtlen = scale_expansion_zeroelim(temp16len, temp16, bdxtail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, bdx, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, bdxtail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + + ylen = scale_expansion_zeroelim(temp16len, temp16, bdy, dety); + yylen = scale_expansion_zeroelim(ylen, dety, bdy, detyy); + ytlen = scale_expansion_zeroelim(temp16len, temp16, bdytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, bdy, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, bdytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + + blen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, bdet); + + temp16len = fast_expansion_sum_zeroelim(8, axby, 8, bxay, temp16); + + xlen = scale_expansion_zeroelim(temp16len, temp16, cdx, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, cdx, detxx); + xtlen = scale_expansion_zeroelim(temp16len, temp16, cdxtail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, cdx, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, cdxtail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + + ylen = scale_expansion_zeroelim(temp16len, temp16, cdy, dety); + yylen = scale_expansion_zeroelim(ylen, dety, cdy, detyy); + ytlen = scale_expansion_zeroelim(temp16len, temp16, cdytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, cdy, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, cdytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + + clen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, cdet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, deter); + + return deter[deterlen - 1]; +} + +REAL incircleadapt(pa, pb, pc, pd, permanent) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL permanent; +{ + INEXACT REAL adx, bdx, cdx, ady, bdy, cdy; + REAL det, errbound; + + INEXACT REAL bdxcdy1, cdxbdy1, cdxady1, adxcdy1, adxbdy1, bdxady1; + REAL bdxcdy0, cdxbdy0, cdxady0, adxcdy0, adxbdy0, bdxady0; + REAL bc[4], ca[4], ab[4]; + INEXACT REAL bc3, ca3, ab3; + REAL axbc[8], axxbc[16], aybc[8], ayybc[16], adet[32]; + int axbclen, axxbclen, aybclen, ayybclen, alen; + REAL bxca[8], bxxca[16], byca[8], byyca[16], bdet[32]; + int bxcalen, bxxcalen, bycalen, byycalen, blen; + REAL cxab[8], cxxab[16], cyab[8], cyyab[16], cdet[32]; + int cxablen, cxxablen, cyablen, cyyablen, clen; + REAL abdet[64]; + int ablen; + REAL fin1[1152], fin2[1152]; + REAL *finnow, *finother, *finswap; + int finlength; + + REAL adxtail, bdxtail, cdxtail, adytail, bdytail, cdytail; + INEXACT REAL adxadx1, adyady1, bdxbdx1, bdybdy1, cdxcdx1, cdycdy1; + REAL adxadx0, adyady0, bdxbdx0, bdybdy0, cdxcdx0, cdycdy0; + REAL aa[4], bb[4], cc[4]; + INEXACT REAL aa3, bb3, cc3; + INEXACT REAL ti1, tj1; + REAL ti0, tj0; + REAL u[4], v[4]; + INEXACT REAL u3, v3; + REAL temp8[8], temp16a[16], temp16b[16], temp16c[16]; + REAL temp32a[32], temp32b[32], temp48[48], temp64[64]; + int temp8len, temp16alen, temp16blen, temp16clen; + int temp32alen, temp32blen, temp48len, temp64len; + REAL axtbb[8], axtcc[8], aytbb[8], aytcc[8]; + int axtbblen, axtcclen, aytbblen, aytcclen; + REAL bxtaa[8], bxtcc[8], bytaa[8], bytcc[8]; + int bxtaalen, bxtcclen, bytaalen, bytcclen; + REAL cxtaa[8], cxtbb[8], cytaa[8], cytbb[8]; + int cxtaalen, cxtbblen, cytaalen, cytbblen; + REAL axtbc[8], aytbc[8], bxtca[8], bytca[8], cxtab[8], cytab[8]; + int axtbclen = 0, aytbclen = 0, bxtcalen = 0, bytcalen = 0, cxtablen = 0, cytablen = 0; + REAL axtbct[16], aytbct[16], bxtcat[16], bytcat[16], cxtabt[16], cytabt[16]; + int axtbctlen, aytbctlen, bxtcatlen, bytcatlen, cxtabtlen, cytabtlen; + REAL axtbctt[8], aytbctt[8], bxtcatt[8]; + REAL bytcatt[8], cxtabtt[8], cytabtt[8]; + int axtbcttlen, aytbcttlen, bxtcattlen, bytcattlen, cxtabttlen, cytabttlen; + REAL abt[8], bct[8], cat[8]; + int abtlen, bctlen, catlen; + REAL abtt[4], bctt[4], catt[4]; + int abttlen, bcttlen, cattlen; + INEXACT REAL abtt3, bctt3, catt3; + REAL negate; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + adx = (REAL)(pa[0] - pd[0]); + bdx = (REAL)(pb[0] - pd[0]); + cdx = (REAL)(pc[0] - pd[0]); + ady = (REAL)(pa[1] - pd[1]); + bdy = (REAL)(pb[1] - pd[1]); + cdy = (REAL)(pc[1] - pd[1]); + + Two_Product(bdx, cdy, bdxcdy1, bdxcdy0); + Two_Product(cdx, bdy, cdxbdy1, cdxbdy0); + Two_Two_Diff(bdxcdy1, bdxcdy0, cdxbdy1, cdxbdy0, bc3, bc[2], bc[1], bc[0]); + bc[3] = bc3; + axbclen = scale_expansion_zeroelim(4, bc, adx, axbc); + axxbclen = scale_expansion_zeroelim(axbclen, axbc, adx, axxbc); + aybclen = scale_expansion_zeroelim(4, bc, ady, aybc); + ayybclen = scale_expansion_zeroelim(aybclen, aybc, ady, ayybc); + alen = fast_expansion_sum_zeroelim(axxbclen, axxbc, ayybclen, ayybc, adet); + + Two_Product(cdx, ady, cdxady1, cdxady0); + Two_Product(adx, cdy, adxcdy1, adxcdy0); + Two_Two_Diff(cdxady1, cdxady0, adxcdy1, adxcdy0, ca3, ca[2], ca[1], ca[0]); + ca[3] = ca3; + bxcalen = scale_expansion_zeroelim(4, ca, bdx, bxca); + bxxcalen = scale_expansion_zeroelim(bxcalen, bxca, bdx, bxxca); + bycalen = scale_expansion_zeroelim(4, ca, bdy, byca); + byycalen = scale_expansion_zeroelim(bycalen, byca, bdy, byyca); + blen = fast_expansion_sum_zeroelim(bxxcalen, bxxca, byycalen, byyca, bdet); + + Two_Product(adx, bdy, adxbdy1, adxbdy0); + Two_Product(bdx, ady, bdxady1, bdxady0); + Two_Two_Diff(adxbdy1, adxbdy0, bdxady1, bdxady0, ab3, ab[2], ab[1], ab[0]); + ab[3] = ab3; + cxablen = scale_expansion_zeroelim(4, ab, cdx, cxab); + cxxablen = scale_expansion_zeroelim(cxablen, cxab, cdx, cxxab); + cyablen = scale_expansion_zeroelim(4, ab, cdy, cyab); + cyyablen = scale_expansion_zeroelim(cyablen, cyab, cdy, cyyab); + clen = fast_expansion_sum_zeroelim(cxxablen, cxxab, cyyablen, cyyab, cdet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + finlength = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, fin1); + + det = estimate(finlength, fin1); + errbound = iccerrboundB * permanent; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Diff_Tail(pa[0], pd[0], adx, adxtail); + Two_Diff_Tail(pa[1], pd[1], ady, adytail); + Two_Diff_Tail(pb[0], pd[0], bdx, bdxtail); + Two_Diff_Tail(pb[1], pd[1], bdy, bdytail); + Two_Diff_Tail(pc[0], pd[0], cdx, cdxtail); + Two_Diff_Tail(pc[1], pd[1], cdy, cdytail); + if((adxtail == 0.0) && (bdxtail == 0.0) && (cdxtail == 0.0) && (adytail == 0.0) && (bdytail == 0.0) && (cdytail == 0.0)) + { + return det; + } + + errbound = iccerrboundC * permanent + resulterrbound * Absolute(det); + det += ((adx * adx + ady * ady) * ((bdx * cdytail + cdy * bdxtail) - (bdy * cdxtail + cdx * bdytail)) + + 2.0 * (adx * adxtail + ady * adytail) * (bdx * cdy - bdy * cdx)) + + ((bdx * bdx + bdy * bdy) * ((cdx * adytail + ady * cdxtail) - (cdy * adxtail + adx * cdytail)) + + 2.0 * (bdx * bdxtail + bdy * bdytail) * (cdx * ady - cdy * adx)) + + ((cdx * cdx + cdy * cdy) * ((adx * bdytail + bdy * adxtail) - (ady * bdxtail + bdx * adytail)) + + 2.0 * (cdx * cdxtail + cdy * cdytail) * (adx * bdy - ady * bdx)); + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + finnow = fin1; + finother = fin2; + + if((bdxtail != 0.0) || (bdytail != 0.0) || (cdxtail != 0.0) || (cdytail != 0.0)) + { + Square(adx, adxadx1, adxadx0); + Square(ady, adyady1, adyady0); + Two_Two_Sum(adxadx1, adxadx0, adyady1, adyady0, aa3, aa[2], aa[1], aa[0]); + aa[3] = aa3; + } + if((cdxtail != 0.0) || (cdytail != 0.0) || (adxtail != 0.0) || (adytail != 0.0)) + { + Square(bdx, bdxbdx1, bdxbdx0); + Square(bdy, bdybdy1, bdybdy0); + Two_Two_Sum(bdxbdx1, bdxbdx0, bdybdy1, bdybdy0, bb3, bb[2], bb[1], bb[0]); + bb[3] = bb3; + } + if((adxtail != 0.0) || (adytail != 0.0) || (bdxtail != 0.0) || (bdytail != 0.0)) + { + Square(cdx, cdxcdx1, cdxcdx0); + Square(cdy, cdycdy1, cdycdy0); + Two_Two_Sum(cdxcdx1, cdxcdx0, cdycdy1, cdycdy0, cc3, cc[2], cc[1], cc[0]); + cc[3] = cc3; + } + + if(adxtail != 0.0) + { + axtbclen = scale_expansion_zeroelim(4, bc, adxtail, axtbc); + temp16alen = scale_expansion_zeroelim(axtbclen, axtbc, 2.0 * adx, temp16a); + + axtcclen = scale_expansion_zeroelim(4, cc, adxtail, axtcc); + temp16blen = scale_expansion_zeroelim(axtcclen, axtcc, bdy, temp16b); + + axtbblen = scale_expansion_zeroelim(4, bb, adxtail, axtbb); + temp16clen = scale_expansion_zeroelim(axtbblen, axtbb, -cdy, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(adytail != 0.0) + { + aytbclen = scale_expansion_zeroelim(4, bc, adytail, aytbc); + temp16alen = scale_expansion_zeroelim(aytbclen, aytbc, 2.0 * ady, temp16a); + + aytbblen = scale_expansion_zeroelim(4, bb, adytail, aytbb); + temp16blen = scale_expansion_zeroelim(aytbblen, aytbb, cdx, temp16b); + + aytcclen = scale_expansion_zeroelim(4, cc, adytail, aytcc); + temp16clen = scale_expansion_zeroelim(aytcclen, aytcc, -bdx, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdxtail != 0.0) + { + bxtcalen = scale_expansion_zeroelim(4, ca, bdxtail, bxtca); + temp16alen = scale_expansion_zeroelim(bxtcalen, bxtca, 2.0 * bdx, temp16a); + + bxtaalen = scale_expansion_zeroelim(4, aa, bdxtail, bxtaa); + temp16blen = scale_expansion_zeroelim(bxtaalen, bxtaa, cdy, temp16b); + + bxtcclen = scale_expansion_zeroelim(4, cc, bdxtail, bxtcc); + temp16clen = scale_expansion_zeroelim(bxtcclen, bxtcc, -ady, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdytail != 0.0) + { + bytcalen = scale_expansion_zeroelim(4, ca, bdytail, bytca); + temp16alen = scale_expansion_zeroelim(bytcalen, bytca, 2.0 * bdy, temp16a); + + bytcclen = scale_expansion_zeroelim(4, cc, bdytail, bytcc); + temp16blen = scale_expansion_zeroelim(bytcclen, bytcc, adx, temp16b); + + bytaalen = scale_expansion_zeroelim(4, aa, bdytail, bytaa); + temp16clen = scale_expansion_zeroelim(bytaalen, bytaa, -cdx, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdxtail != 0.0) + { + cxtablen = scale_expansion_zeroelim(4, ab, cdxtail, cxtab); + temp16alen = scale_expansion_zeroelim(cxtablen, cxtab, 2.0 * cdx, temp16a); + + cxtbblen = scale_expansion_zeroelim(4, bb, cdxtail, cxtbb); + temp16blen = scale_expansion_zeroelim(cxtbblen, cxtbb, ady, temp16b); + + cxtaalen = scale_expansion_zeroelim(4, aa, cdxtail, cxtaa); + temp16clen = scale_expansion_zeroelim(cxtaalen, cxtaa, -bdy, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdytail != 0.0) + { + cytablen = scale_expansion_zeroelim(4, ab, cdytail, cytab); + temp16alen = scale_expansion_zeroelim(cytablen, cytab, 2.0 * cdy, temp16a); + + cytaalen = scale_expansion_zeroelim(4, aa, cdytail, cytaa); + temp16blen = scale_expansion_zeroelim(cytaalen, cytaa, bdx, temp16b); + + cytbblen = scale_expansion_zeroelim(4, bb, cdytail, cytbb); + temp16clen = scale_expansion_zeroelim(cytbblen, cytbb, -adx, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + if((adxtail != 0.0) || (adytail != 0.0)) + { + if((bdxtail != 0.0) || (bdytail != 0.0) || (cdxtail != 0.0) || (cdytail != 0.0)) + { + Two_Product(bdxtail, cdy, ti1, ti0); + Two_Product(bdx, cdytail, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, u3, u[2], u[1], u[0]); + u[3] = u3; + negate = -bdy; + Two_Product(cdxtail, negate, ti1, ti0); + negate = -bdytail; + Two_Product(cdx, negate, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, v3, v[2], v[1], v[0]); + v[3] = v3; + bctlen = fast_expansion_sum_zeroelim(4, u, 4, v, bct); + + Two_Product(bdxtail, cdytail, ti1, ti0); + Two_Product(cdxtail, bdytail, tj1, tj0); + Two_Two_Diff(ti1, ti0, tj1, tj0, bctt3, bctt[2], bctt[1], bctt[0]); + bctt[3] = bctt3; + bcttlen = 4; + } + else + { + bct[0] = 0.0; + bctlen = 1; + bctt[0] = 0.0; + bcttlen = 1; + } + + if(adxtail != 0.0) + { + temp16alen = scale_expansion_zeroelim(axtbclen, axtbc, adxtail, temp16a); + axtbctlen = scale_expansion_zeroelim(bctlen, bct, adxtail, axtbct); + temp32alen = scale_expansion_zeroelim(axtbctlen, axtbct, 2.0 * adx, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(bdytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, cc, adxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, bdytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, bb, -adxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, cdytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + temp32alen = scale_expansion_zeroelim(axtbctlen, axtbct, adxtail, temp32a); + axtbcttlen = scale_expansion_zeroelim(bcttlen, bctt, adxtail, axtbctt); + temp16alen = scale_expansion_zeroelim(axtbcttlen, axtbctt, 2.0 * adx, temp16a); + temp16blen = scale_expansion_zeroelim(axtbcttlen, axtbctt, adxtail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(adytail != 0.0) + { + temp16alen = scale_expansion_zeroelim(aytbclen, aytbc, adytail, temp16a); + aytbctlen = scale_expansion_zeroelim(bctlen, bct, adytail, aytbct); + temp32alen = scale_expansion_zeroelim(aytbctlen, aytbct, 2.0 * ady, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + temp32alen = scale_expansion_zeroelim(aytbctlen, aytbct, adytail, temp32a); + aytbcttlen = scale_expansion_zeroelim(bcttlen, bctt, adytail, aytbctt); + temp16alen = scale_expansion_zeroelim(aytbcttlen, aytbctt, 2.0 * ady, temp16a); + temp16blen = scale_expansion_zeroelim(aytbcttlen, aytbctt, adytail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if((bdxtail != 0.0) || (bdytail != 0.0)) + { + if((cdxtail != 0.0) || (cdytail != 0.0) || (adxtail != 0.0) || (adytail != 0.0)) + { + Two_Product(cdxtail, ady, ti1, ti0); + Two_Product(cdx, adytail, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, u3, u[2], u[1], u[0]); + u[3] = u3; + negate = -cdy; + Two_Product(adxtail, negate, ti1, ti0); + negate = -cdytail; + Two_Product(adx, negate, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, v3, v[2], v[1], v[0]); + v[3] = v3; + catlen = fast_expansion_sum_zeroelim(4, u, 4, v, cat); + + Two_Product(cdxtail, adytail, ti1, ti0); + Two_Product(adxtail, cdytail, tj1, tj0); + Two_Two_Diff(ti1, ti0, tj1, tj0, catt3, catt[2], catt[1], catt[0]); + catt[3] = catt3; + cattlen = 4; + } + else + { + cat[0] = 0.0; + catlen = 1; + catt[0] = 0.0; + cattlen = 1; + } + + if(bdxtail != 0.0) + { + temp16alen = scale_expansion_zeroelim(bxtcalen, bxtca, bdxtail, temp16a); + bxtcatlen = scale_expansion_zeroelim(catlen, cat, bdxtail, bxtcat); + temp32alen = scale_expansion_zeroelim(bxtcatlen, bxtcat, 2.0 * bdx, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(cdytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, aa, bdxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, cdytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(adytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, cc, -bdxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, adytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + temp32alen = scale_expansion_zeroelim(bxtcatlen, bxtcat, bdxtail, temp32a); + bxtcattlen = scale_expansion_zeroelim(cattlen, catt, bdxtail, bxtcatt); + temp16alen = scale_expansion_zeroelim(bxtcattlen, bxtcatt, 2.0 * bdx, temp16a); + temp16blen = scale_expansion_zeroelim(bxtcattlen, bxtcatt, bdxtail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdytail != 0.0) + { + temp16alen = scale_expansion_zeroelim(bytcalen, bytca, bdytail, temp16a); + bytcatlen = scale_expansion_zeroelim(catlen, cat, bdytail, bytcat); + temp32alen = scale_expansion_zeroelim(bytcatlen, bytcat, 2.0 * bdy, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + temp32alen = scale_expansion_zeroelim(bytcatlen, bytcat, bdytail, temp32a); + bytcattlen = scale_expansion_zeroelim(cattlen, catt, bdytail, bytcatt); + temp16alen = scale_expansion_zeroelim(bytcattlen, bytcatt, 2.0 * bdy, temp16a); + temp16blen = scale_expansion_zeroelim(bytcattlen, bytcatt, bdytail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if((cdxtail != 0.0) || (cdytail != 0.0)) + { + if((adxtail != 0.0) || (adytail != 0.0) || (bdxtail != 0.0) || (bdytail != 0.0)) + { + Two_Product(adxtail, bdy, ti1, ti0); + Two_Product(adx, bdytail, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, u3, u[2], u[1], u[0]); + u[3] = u3; + negate = -ady; + Two_Product(bdxtail, negate, ti1, ti0); + negate = -adytail; + Two_Product(bdx, negate, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, v3, v[2], v[1], v[0]); + v[3] = v3; + abtlen = fast_expansion_sum_zeroelim(4, u, 4, v, abt); + + Two_Product(adxtail, bdytail, ti1, ti0); + Two_Product(bdxtail, adytail, tj1, tj0); + Two_Two_Diff(ti1, ti0, tj1, tj0, abtt3, abtt[2], abtt[1], abtt[0]); + abtt[3] = abtt3; + abttlen = 4; + } + else + { + abt[0] = 0.0; + abtlen = 1; + abtt[0] = 0.0; + abttlen = 1; + } + + if(cdxtail != 0.0) + { + temp16alen = scale_expansion_zeroelim(cxtablen, cxtab, cdxtail, temp16a); + cxtabtlen = scale_expansion_zeroelim(abtlen, abt, cdxtail, cxtabt); + temp32alen = scale_expansion_zeroelim(cxtabtlen, cxtabt, 2.0 * cdx, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(adytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, bb, cdxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, adytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, aa, -cdxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, bdytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + temp32alen = scale_expansion_zeroelim(cxtabtlen, cxtabt, cdxtail, temp32a); + cxtabttlen = scale_expansion_zeroelim(abttlen, abtt, cdxtail, cxtabtt); + temp16alen = scale_expansion_zeroelim(cxtabttlen, cxtabtt, 2.0 * cdx, temp16a); + temp16blen = scale_expansion_zeroelim(cxtabttlen, cxtabtt, cdxtail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdytail != 0.0) + { + temp16alen = scale_expansion_zeroelim(cytablen, cytab, cdytail, temp16a); + cytabtlen = scale_expansion_zeroelim(abtlen, abt, cdytail, cytabt); + temp32alen = scale_expansion_zeroelim(cytabtlen, cytabt, 2.0 * cdy, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + temp32alen = scale_expansion_zeroelim(cytabtlen, cytabt, cdytail, temp32a); + cytabttlen = scale_expansion_zeroelim(abttlen, abtt, cdytail, cytabtt); + temp16alen = scale_expansion_zeroelim(cytabttlen, cytabtt, 2.0 * cdy, temp16a); + temp16blen = scale_expansion_zeroelim(cytabttlen, cytabtt, cdytail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + + return finnow[finlength - 1]; +} + +REAL incircle(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + REAL adx, bdx, cdx, ady, bdy, cdy; + REAL bdxcdy, cdxbdy, cdxady, adxcdy, adxbdy, bdxady; + REAL alift, blift, clift; + REAL det; + REAL permanent, errbound; + + adx = pa[0] - pd[0]; + bdx = pb[0] - pd[0]; + cdx = pc[0] - pd[0]; + ady = pa[1] - pd[1]; + bdy = pb[1] - pd[1]; + cdy = pc[1] - pd[1]; + + bdxcdy = bdx * cdy; + cdxbdy = cdx * bdy; + alift = adx * adx + ady * ady; + + cdxady = cdx * ady; + adxcdy = adx * cdy; + blift = bdx * bdx + bdy * bdy; + + adxbdy = adx * bdy; + bdxady = bdx * ady; + clift = cdx * cdx + cdy * cdy; + + det = alift * (bdxcdy - cdxbdy) + blift * (cdxady - adxcdy) + clift * (adxbdy - bdxady); + + permanent = (Absolute(bdxcdy) + Absolute(cdxbdy)) * alift + (Absolute(cdxady) + Absolute(adxcdy)) * blift + + (Absolute(adxbdy) + Absolute(bdxady)) * clift; + errbound = iccerrboundA * permanent; + if((det > errbound) || (-det > errbound)) + { + return det; + } + + return incircleadapt(pa, pb, pc, pd, permanent); +} + +/*****************************************************************************/ +/* */ +/* inspherefast() Approximate 3D insphere test. Nonrobust. */ +/* insphereexact() Exact 3D insphere test. Robust. */ +/* insphereslow() Another exact 3D insphere test. Robust. */ +/* insphere() Adaptive exact 3D insphere test. Robust. */ +/* */ +/* Return a positive value if the point pe lies inside the */ +/* sphere passing through pa, pb, pc, and pd; a negative value */ +/* if it lies outside; and zero if the five points are */ +/* cospherical. The points pa, pb, pc, and pd must be ordered */ +/* so that they have a positive orientation (as defined by */ +/* orient3d()), or the sign of the result will be reversed. */ +/* */ +/* Only the first and last routine should be used; the middle two are for */ +/* timings. */ +/* */ +/* The last three use exact arithmetic to ensure a correct answer. The */ +/* result returned is the determinant of a matrix. In insphere() only, */ +/* this determinant is computed adaptively, in the sense that exact */ +/* arithmetic is used only to the degree it is needed to ensure that the */ +/* returned value has the correct sign. Hence, insphere() is usually quite */ +/* fast, but will run more slowly when the input points are cospherical or */ +/* nearly so. */ +/* */ +/*****************************************************************************/ + +REAL inspherefast(pa, pb, pc, pd, pe) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +{ + REAL aex, bex, cex, dex; + REAL aey, bey, cey, dey; + REAL aez, bez, cez, dez; + REAL alift, blift, clift, dlift; + REAL ab, bc, cd, da, ac, bd; + REAL abc, bcd, cda, dab; + + aex = pa[0] - pe[0]; + bex = pb[0] - pe[0]; + cex = pc[0] - pe[0]; + dex = pd[0] - pe[0]; + aey = pa[1] - pe[1]; + bey = pb[1] - pe[1]; + cey = pc[1] - pe[1]; + dey = pd[1] - pe[1]; + aez = pa[2] - pe[2]; + bez = pb[2] - pe[2]; + cez = pc[2] - pe[2]; + dez = pd[2] - pe[2]; + + ab = aex * bey - bex * aey; + bc = bex * cey - cex * bey; + cd = cex * dey - dex * cey; + da = dex * aey - aex * dey; + + ac = aex * cey - cex * aey; + bd = bex * dey - dex * bey; + + abc = aez * bc - bez * ac + cez * ab; + bcd = bez * cd - cez * bd + dez * bc; + cda = cez * da + dez * ac + aez * cd; + dab = dez * ab + aez * bd + bez * da; + + alift = aex * aex + aey * aey + aez * aez; + blift = bex * bex + bey * bey + bez * bez; + clift = cex * cex + cey * cey + cez * cez; + dlift = dex * dex + dey * dey + dez * dez; + + return (dlift * abc - clift * dab) + (blift * cda - alift * bcd); +} + +REAL insphereexact(pa, pb, pc, pd, pe) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +{ + INEXACT REAL axby1, bxcy1, cxdy1, dxey1, exay1; + INEXACT REAL bxay1, cxby1, dxcy1, exdy1, axey1; + INEXACT REAL axcy1, bxdy1, cxey1, dxay1, exby1; + INEXACT REAL cxay1, dxby1, excy1, axdy1, bxey1; + REAL axby0, bxcy0, cxdy0, dxey0, exay0; + REAL bxay0, cxby0, dxcy0, exdy0, axey0; + REAL axcy0, bxdy0, cxey0, dxay0, exby0; + REAL cxay0, dxby0, excy0, axdy0, bxey0; + REAL ab[4], bc[4], cd[4], de[4], ea[4]; + REAL ac[4], bd[4], ce[4], da[4], eb[4]; + REAL temp8a[8], temp8b[8], temp16[16]; + int temp8alen, temp8blen, temp16len; + REAL abc[24], bcd[24], cde[24], dea[24], eab[24]; + REAL abd[24], bce[24], cda[24], deb[24], eac[24]; + int abclen, bcdlen, cdelen, dealen, eablen; + int abdlen, bcelen, cdalen, deblen, eaclen; + REAL temp48a[48], temp48b[48]; + int temp48alen, temp48blen; + REAL abcd[96], bcde[96], cdea[96], deab[96], eabc[96]; + int abcdlen, bcdelen, cdealen, deablen, eabclen; + REAL temp192[192]; + REAL det384x[384], det384y[384], det384z[384]; + int xlen, ylen, zlen; + REAL detxy[768]; + int xylen; + REAL adet[1152], bdet[1152], cdet[1152], ddet[1152], edet[1152]; + int alen, blen, clen, dlen, elen; + REAL abdet[2304], cddet[2304], cdedet[3456]; + int ablen, cdlen; + REAL deter[5760]; + int deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + Two_Product(pa[0], pb[1], axby1, axby0); + Two_Product(pb[0], pa[1], bxay1, bxay0); + Two_Two_Diff(axby1, axby0, bxay1, bxay0, ab[3], ab[2], ab[1], ab[0]); + + Two_Product(pb[0], pc[1], bxcy1, bxcy0); + Two_Product(pc[0], pb[1], cxby1, cxby0); + Two_Two_Diff(bxcy1, bxcy0, cxby1, cxby0, bc[3], bc[2], bc[1], bc[0]); + + Two_Product(pc[0], pd[1], cxdy1, cxdy0); + Two_Product(pd[0], pc[1], dxcy1, dxcy0); + Two_Two_Diff(cxdy1, cxdy0, dxcy1, dxcy0, cd[3], cd[2], cd[1], cd[0]); + + Two_Product(pd[0], pe[1], dxey1, dxey0); + Two_Product(pe[0], pd[1], exdy1, exdy0); + Two_Two_Diff(dxey1, dxey0, exdy1, exdy0, de[3], de[2], de[1], de[0]); + + Two_Product(pe[0], pa[1], exay1, exay0); + Two_Product(pa[0], pe[1], axey1, axey0); + Two_Two_Diff(exay1, exay0, axey1, axey0, ea[3], ea[2], ea[1], ea[0]); + + Two_Product(pa[0], pc[1], axcy1, axcy0); + Two_Product(pc[0], pa[1], cxay1, cxay0); + Two_Two_Diff(axcy1, axcy0, cxay1, cxay0, ac[3], ac[2], ac[1], ac[0]); + + Two_Product(pb[0], pd[1], bxdy1, bxdy0); + Two_Product(pd[0], pb[1], dxby1, dxby0); + Two_Two_Diff(bxdy1, bxdy0, dxby1, dxby0, bd[3], bd[2], bd[1], bd[0]); + + Two_Product(pc[0], pe[1], cxey1, cxey0); + Two_Product(pe[0], pc[1], excy1, excy0); + Two_Two_Diff(cxey1, cxey0, excy1, excy0, ce[3], ce[2], ce[1], ce[0]); + + Two_Product(pd[0], pa[1], dxay1, dxay0); + Two_Product(pa[0], pd[1], axdy1, axdy0); + Two_Two_Diff(dxay1, dxay0, axdy1, axdy0, da[3], da[2], da[1], da[0]); + + Two_Product(pe[0], pb[1], exby1, exby0); + Two_Product(pb[0], pe[1], bxey1, bxey0); + Two_Two_Diff(exby1, exby0, bxey1, bxey0, eb[3], eb[2], eb[1], eb[0]); + + temp8alen = scale_expansion_zeroelim(4, bc, pa[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, ac, -pb[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, ab, pc[2], temp8a); + abclen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, abc); + + temp8alen = scale_expansion_zeroelim(4, cd, pb[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, bd, -pc[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, bc, pd[2], temp8a); + bcdlen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, bcd); + + temp8alen = scale_expansion_zeroelim(4, de, pc[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, ce, -pd[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, cd, pe[2], temp8a); + cdelen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, cde); + + temp8alen = scale_expansion_zeroelim(4, ea, pd[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, da, -pe[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, de, pa[2], temp8a); + dealen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, dea); + + temp8alen = scale_expansion_zeroelim(4, ab, pe[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, eb, -pa[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, ea, pb[2], temp8a); + eablen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, eab); + + temp8alen = scale_expansion_zeroelim(4, bd, pa[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, da, pb[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, ab, pd[2], temp8a); + abdlen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, abd); + + temp8alen = scale_expansion_zeroelim(4, ce, pb[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, eb, pc[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, bc, pe[2], temp8a); + bcelen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, bce); + + temp8alen = scale_expansion_zeroelim(4, da, pc[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, ac, pd[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, cd, pa[2], temp8a); + cdalen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, cda); + + temp8alen = scale_expansion_zeroelim(4, eb, pd[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, bd, pe[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, de, pb[2], temp8a); + deblen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, deb); + + temp8alen = scale_expansion_zeroelim(4, ac, pe[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, ce, pa[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, ea, pc[2], temp8a); + eaclen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, eac); + + temp48alen = fast_expansion_sum_zeroelim(cdelen, cde, bcelen, bce, temp48a); + temp48blen = fast_expansion_sum_zeroelim(deblen, deb, bcdlen, bcd, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + bcdelen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, bcde); + xlen = scale_expansion_zeroelim(bcdelen, bcde, pa[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pa[0], det384x); + ylen = scale_expansion_zeroelim(bcdelen, bcde, pa[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pa[1], det384y); + zlen = scale_expansion_zeroelim(bcdelen, bcde, pa[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pa[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + alen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, adet); + + temp48alen = fast_expansion_sum_zeroelim(dealen, dea, cdalen, cda, temp48a); + temp48blen = fast_expansion_sum_zeroelim(eaclen, eac, cdelen, cde, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + cdealen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, cdea); + xlen = scale_expansion_zeroelim(cdealen, cdea, pb[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pb[0], det384x); + ylen = scale_expansion_zeroelim(cdealen, cdea, pb[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pb[1], det384y); + zlen = scale_expansion_zeroelim(cdealen, cdea, pb[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pb[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + blen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, bdet); + + temp48alen = fast_expansion_sum_zeroelim(eablen, eab, deblen, deb, temp48a); + temp48blen = fast_expansion_sum_zeroelim(abdlen, abd, dealen, dea, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + deablen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, deab); + xlen = scale_expansion_zeroelim(deablen, deab, pc[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pc[0], det384x); + ylen = scale_expansion_zeroelim(deablen, deab, pc[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pc[1], det384y); + zlen = scale_expansion_zeroelim(deablen, deab, pc[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pc[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + clen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, cdet); + + temp48alen = fast_expansion_sum_zeroelim(abclen, abc, eaclen, eac, temp48a); + temp48blen = fast_expansion_sum_zeroelim(bcelen, bce, eablen, eab, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + eabclen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, eabc); + xlen = scale_expansion_zeroelim(eabclen, eabc, pd[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pd[0], det384x); + ylen = scale_expansion_zeroelim(eabclen, eabc, pd[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pd[1], det384y); + zlen = scale_expansion_zeroelim(eabclen, eabc, pd[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pd[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + dlen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, ddet); + + temp48alen = fast_expansion_sum_zeroelim(bcdlen, bcd, abdlen, abd, temp48a); + temp48blen = fast_expansion_sum_zeroelim(cdalen, cda, abclen, abc, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + abcdlen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, abcd); + xlen = scale_expansion_zeroelim(abcdlen, abcd, pe[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pe[0], det384x); + ylen = scale_expansion_zeroelim(abcdlen, abcd, pe[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pe[1], det384y); + zlen = scale_expansion_zeroelim(abcdlen, abcd, pe[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pe[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + elen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, edet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + cdelen = fast_expansion_sum_zeroelim(cdlen, cddet, elen, edet, cdedet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdelen, cdedet, deter); + + return deter[deterlen - 1]; +} + +REAL insphereslow(pa, pb, pc, pd, pe) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +{ + INEXACT REAL aex, bex, cex, dex, aey, bey, cey, dey, aez, bez, cez, dez; + REAL aextail, bextail, cextail, dextail; + REAL aeytail, beytail, ceytail, deytail; + REAL aeztail, beztail, ceztail, deztail; + REAL negate, negatetail; + INEXACT REAL axby7, bxcy7, cxdy7, dxay7, axcy7, bxdy7; + INEXACT REAL bxay7, cxby7, dxcy7, axdy7, cxay7, dxby7; + REAL axby[8], bxcy[8], cxdy[8], dxay[8], axcy[8], bxdy[8]; + REAL bxay[8], cxby[8], dxcy[8], axdy[8], cxay[8], dxby[8]; + REAL ab[16], bc[16], cd[16], da[16], ac[16], bd[16]; + int ablen, bclen, cdlen, dalen, aclen, bdlen; + REAL temp32a[32], temp32b[32], temp64a[64], temp64b[64], temp64c[64]; + int temp32alen, temp32blen, temp64alen, temp64blen, temp64clen; + REAL temp128[128], temp192[192]; + int temp128len, temp192len; + REAL detx[384], detxx[768], detxt[384], detxxt[768], detxtxt[768]; + int xlen, xxlen, xtlen, xxtlen, xtxtlen; + REAL x1[1536], x2[2304]; + int x1len, x2len; + REAL dety[384], detyy[768], detyt[384], detyyt[768], detytyt[768]; + int ylen, yylen, ytlen, yytlen, ytytlen; + REAL y1[1536], y2[2304]; + int y1len, y2len; + REAL detz[384], detzz[768], detzt[384], detzzt[768], detztzt[768]; + int zlen, zzlen, ztlen, zztlen, ztztlen; + REAL z1[1536], z2[2304]; + int z1len, z2len; + REAL detxy[4608]; + int xylen; + REAL adet[6912], bdet[6912], cdet[6912], ddet[6912]; + int alen, blen, clen, dlen; + REAL abdet[13824], cddet[13824], deter[27648]; + int deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL a0hi, a0lo, a1hi, a1lo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k, _l, _m, _n; + REAL _0, _1, _2; + + Two_Diff(pa[0], pe[0], aex, aextail); + Two_Diff(pa[1], pe[1], aey, aeytail); + Two_Diff(pa[2], pe[2], aez, aeztail); + Two_Diff(pb[0], pe[0], bex, bextail); + Two_Diff(pb[1], pe[1], bey, beytail); + Two_Diff(pb[2], pe[2], bez, beztail); + Two_Diff(pc[0], pe[0], cex, cextail); + Two_Diff(pc[1], pe[1], cey, ceytail); + Two_Diff(pc[2], pe[2], cez, ceztail); + Two_Diff(pd[0], pe[0], dex, dextail); + Two_Diff(pd[1], pe[1], dey, deytail); + Two_Diff(pd[2], pe[2], dez, deztail); + + Two_Two_Product(aex, aextail, bey, beytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]); + axby[7] = axby7; + negate = -aey; + negatetail = -aeytail; + Two_Two_Product(bex, bextail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]); + bxay[7] = bxay7; + ablen = fast_expansion_sum_zeroelim(8, axby, 8, bxay, ab); + Two_Two_Product(bex, bextail, cey, ceytail, bxcy7, bxcy[6], bxcy[5], bxcy[4], bxcy[3], bxcy[2], bxcy[1], bxcy[0]); + bxcy[7] = bxcy7; + negate = -bey; + negatetail = -beytail; + Two_Two_Product(cex, cextail, negate, negatetail, cxby7, cxby[6], cxby[5], cxby[4], cxby[3], cxby[2], cxby[1], cxby[0]); + cxby[7] = cxby7; + bclen = fast_expansion_sum_zeroelim(8, bxcy, 8, cxby, bc); + Two_Two_Product(cex, cextail, dey, deytail, cxdy7, cxdy[6], cxdy[5], cxdy[4], cxdy[3], cxdy[2], cxdy[1], cxdy[0]); + cxdy[7] = cxdy7; + negate = -cey; + negatetail = -ceytail; + Two_Two_Product(dex, dextail, negate, negatetail, dxcy7, dxcy[6], dxcy[5], dxcy[4], dxcy[3], dxcy[2], dxcy[1], dxcy[0]); + dxcy[7] = dxcy7; + cdlen = fast_expansion_sum_zeroelim(8, cxdy, 8, dxcy, cd); + Two_Two_Product(dex, dextail, aey, aeytail, dxay7, dxay[6], dxay[5], dxay[4], dxay[3], dxay[2], dxay[1], dxay[0]); + dxay[7] = dxay7; + negate = -dey; + negatetail = -deytail; + Two_Two_Product(aex, aextail, negate, negatetail, axdy7, axdy[6], axdy[5], axdy[4], axdy[3], axdy[2], axdy[1], axdy[0]); + axdy[7] = axdy7; + dalen = fast_expansion_sum_zeroelim(8, dxay, 8, axdy, da); + Two_Two_Product(aex, aextail, cey, ceytail, axcy7, axcy[6], axcy[5], axcy[4], axcy[3], axcy[2], axcy[1], axcy[0]); + axcy[7] = axcy7; + negate = -aey; + negatetail = -aeytail; + Two_Two_Product(cex, cextail, negate, negatetail, cxay7, cxay[6], cxay[5], cxay[4], cxay[3], cxay[2], cxay[1], cxay[0]); + cxay[7] = cxay7; + aclen = fast_expansion_sum_zeroelim(8, axcy, 8, cxay, ac); + Two_Two_Product(bex, bextail, dey, deytail, bxdy7, bxdy[6], bxdy[5], bxdy[4], bxdy[3], bxdy[2], bxdy[1], bxdy[0]); + bxdy[7] = bxdy7; + negate = -bey; + negatetail = -beytail; + Two_Two_Product(dex, dextail, negate, negatetail, dxby7, dxby[6], dxby[5], dxby[4], dxby[3], dxby[2], dxby[1], dxby[0]); + dxby[7] = dxby7; + bdlen = fast_expansion_sum_zeroelim(8, bxdy, 8, dxby, bd); + + temp32alen = scale_expansion_zeroelim(cdlen, cd, -bez, temp32a); + temp32blen = scale_expansion_zeroelim(cdlen, cd, -beztail, temp32b); + temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a); + temp32alen = scale_expansion_zeroelim(bdlen, bd, cez, temp32a); + temp32blen = scale_expansion_zeroelim(bdlen, bd, ceztail, temp32b); + temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b); + temp32alen = scale_expansion_zeroelim(bclen, bc, -dez, temp32a); + temp32blen = scale_expansion_zeroelim(bclen, bc, -deztail, temp32b); + temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c); + temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128); + temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192); + xlen = scale_expansion_zeroelim(temp192len, temp192, aex, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, aex, detxx); + xtlen = scale_expansion_zeroelim(temp192len, temp192, aextail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, aex, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, aextail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + ylen = scale_expansion_zeroelim(temp192len, temp192, aey, dety); + yylen = scale_expansion_zeroelim(ylen, dety, aey, detyy); + ytlen = scale_expansion_zeroelim(temp192len, temp192, aeytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, aey, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, aeytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + zlen = scale_expansion_zeroelim(temp192len, temp192, aez, detz); + zzlen = scale_expansion_zeroelim(zlen, detz, aez, detzz); + ztlen = scale_expansion_zeroelim(temp192len, temp192, aeztail, detzt); + zztlen = scale_expansion_zeroelim(ztlen, detzt, aez, detzzt); + for(i = 0; i < zztlen; i++) + { + detzzt[i] *= 2.0; + } + ztztlen = scale_expansion_zeroelim(ztlen, detzt, aeztail, detztzt); + z1len = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1); + z2len = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2); + xylen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy); + alen = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, adet); + + temp32alen = scale_expansion_zeroelim(dalen, da, cez, temp32a); + temp32blen = scale_expansion_zeroelim(dalen, da, ceztail, temp32b); + temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a); + temp32alen = scale_expansion_zeroelim(aclen, ac, dez, temp32a); + temp32blen = scale_expansion_zeroelim(aclen, ac, deztail, temp32b); + temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b); + temp32alen = scale_expansion_zeroelim(cdlen, cd, aez, temp32a); + temp32blen = scale_expansion_zeroelim(cdlen, cd, aeztail, temp32b); + temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c); + temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128); + temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192); + xlen = scale_expansion_zeroelim(temp192len, temp192, bex, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, bex, detxx); + xtlen = scale_expansion_zeroelim(temp192len, temp192, bextail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, bex, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, bextail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + ylen = scale_expansion_zeroelim(temp192len, temp192, bey, dety); + yylen = scale_expansion_zeroelim(ylen, dety, bey, detyy); + ytlen = scale_expansion_zeroelim(temp192len, temp192, beytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, bey, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, beytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + zlen = scale_expansion_zeroelim(temp192len, temp192, bez, detz); + zzlen = scale_expansion_zeroelim(zlen, detz, bez, detzz); + ztlen = scale_expansion_zeroelim(temp192len, temp192, beztail, detzt); + zztlen = scale_expansion_zeroelim(ztlen, detzt, bez, detzzt); + for(i = 0; i < zztlen; i++) + { + detzzt[i] *= 2.0; + } + ztztlen = scale_expansion_zeroelim(ztlen, detzt, beztail, detztzt); + z1len = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1); + z2len = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2); + xylen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy); + blen = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, bdet); + + temp32alen = scale_expansion_zeroelim(ablen, ab, -dez, temp32a); + temp32blen = scale_expansion_zeroelim(ablen, ab, -deztail, temp32b); + temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a); + temp32alen = scale_expansion_zeroelim(bdlen, bd, -aez, temp32a); + temp32blen = scale_expansion_zeroelim(bdlen, bd, -aeztail, temp32b); + temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b); + temp32alen = scale_expansion_zeroelim(dalen, da, -bez, temp32a); + temp32blen = scale_expansion_zeroelim(dalen, da, -beztail, temp32b); + temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c); + temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128); + temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192); + xlen = scale_expansion_zeroelim(temp192len, temp192, cex, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, cex, detxx); + xtlen = scale_expansion_zeroelim(temp192len, temp192, cextail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, cex, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, cextail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + ylen = scale_expansion_zeroelim(temp192len, temp192, cey, dety); + yylen = scale_expansion_zeroelim(ylen, dety, cey, detyy); + ytlen = scale_expansion_zeroelim(temp192len, temp192, ceytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, cey, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, ceytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + zlen = scale_expansion_zeroelim(temp192len, temp192, cez, detz); + zzlen = scale_expansion_zeroelim(zlen, detz, cez, detzz); + ztlen = scale_expansion_zeroelim(temp192len, temp192, ceztail, detzt); + zztlen = scale_expansion_zeroelim(ztlen, detzt, cez, detzzt); + for(i = 0; i < zztlen; i++) + { + detzzt[i] *= 2.0; + } + ztztlen = scale_expansion_zeroelim(ztlen, detzt, ceztail, detztzt); + z1len = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1); + z2len = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2); + xylen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy); + clen = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, cdet); + + temp32alen = scale_expansion_zeroelim(bclen, bc, aez, temp32a); + temp32blen = scale_expansion_zeroelim(bclen, bc, aeztail, temp32b); + temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a); + temp32alen = scale_expansion_zeroelim(aclen, ac, -bez, temp32a); + temp32blen = scale_expansion_zeroelim(aclen, ac, -beztail, temp32b); + temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b); + temp32alen = scale_expansion_zeroelim(ablen, ab, cez, temp32a); + temp32blen = scale_expansion_zeroelim(ablen, ab, ceztail, temp32b); + temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c); + temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128); + temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192); + xlen = scale_expansion_zeroelim(temp192len, temp192, dex, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, dex, detxx); + xtlen = scale_expansion_zeroelim(temp192len, temp192, dextail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, dex, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, dextail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + ylen = scale_expansion_zeroelim(temp192len, temp192, dey, dety); + yylen = scale_expansion_zeroelim(ylen, dety, dey, detyy); + ytlen = scale_expansion_zeroelim(temp192len, temp192, deytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, dey, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, deytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + zlen = scale_expansion_zeroelim(temp192len, temp192, dez, detz); + zzlen = scale_expansion_zeroelim(zlen, detz, dez, detzz); + ztlen = scale_expansion_zeroelim(temp192len, temp192, deztail, detzt); + zztlen = scale_expansion_zeroelim(ztlen, detzt, dez, detzzt); + for(i = 0; i < zztlen; i++) + { + detzzt[i] *= 2.0; + } + ztztlen = scale_expansion_zeroelim(ztlen, detzt, deztail, detztzt); + z1len = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1); + z2len = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2); + xylen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy); + dlen = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, ddet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, deter); + + return deter[deterlen - 1]; +} + +REAL insphereadapt(pa, pb, pc, pd, pe, permanent) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +REAL permanent; +{ + INEXACT REAL aex, bex, cex, dex, aey, bey, cey, dey, aez, bez, cez, dez; + REAL det, errbound; + + INEXACT REAL aexbey1, bexaey1, bexcey1, cexbey1; + INEXACT REAL cexdey1, dexcey1, dexaey1, aexdey1; + INEXACT REAL aexcey1, cexaey1, bexdey1, dexbey1; + REAL aexbey0, bexaey0, bexcey0, cexbey0; + REAL cexdey0, dexcey0, dexaey0, aexdey0; + REAL aexcey0, cexaey0, bexdey0, dexbey0; + REAL ab[4], bc[4], cd[4], da[4], ac[4], bd[4]; + INEXACT REAL ab3, bc3, cd3, da3, ac3, bd3; + REAL abeps, bceps, cdeps, daeps, aceps, bdeps; + REAL temp8a[8], temp8b[8], temp8c[8], temp16[16], temp24[24], temp48[48]; + int temp8alen, temp8blen, temp8clen, temp16len, temp24len, temp48len; + REAL xdet[96], ydet[96], zdet[96], xydet[192]; + int xlen, ylen, zlen, xylen; + REAL adet[288], bdet[288], cdet[288], ddet[288]; + int alen, blen, clen, dlen; + REAL abdet[576], cddet[576]; + int ablen, cdlen; + REAL fin1[1152]; + int finlength; + + REAL aextail, bextail, cextail, dextail; + REAL aeytail, beytail, ceytail, deytail; + REAL aeztail, beztail, ceztail, deztail; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + aex = (REAL)(pa[0] - pe[0]); + bex = (REAL)(pb[0] - pe[0]); + cex = (REAL)(pc[0] - pe[0]); + dex = (REAL)(pd[0] - pe[0]); + aey = (REAL)(pa[1] - pe[1]); + bey = (REAL)(pb[1] - pe[1]); + cey = (REAL)(pc[1] - pe[1]); + dey = (REAL)(pd[1] - pe[1]); + aez = (REAL)(pa[2] - pe[2]); + bez = (REAL)(pb[2] - pe[2]); + cez = (REAL)(pc[2] - pe[2]); + dez = (REAL)(pd[2] - pe[2]); + + Two_Product(aex, bey, aexbey1, aexbey0); + Two_Product(bex, aey, bexaey1, bexaey0); + Two_Two_Diff(aexbey1, aexbey0, bexaey1, bexaey0, ab3, ab[2], ab[1], ab[0]); + ab[3] = ab3; + + Two_Product(bex, cey, bexcey1, bexcey0); + Two_Product(cex, bey, cexbey1, cexbey0); + Two_Two_Diff(bexcey1, bexcey0, cexbey1, cexbey0, bc3, bc[2], bc[1], bc[0]); + bc[3] = bc3; + + Two_Product(cex, dey, cexdey1, cexdey0); + Two_Product(dex, cey, dexcey1, dexcey0); + Two_Two_Diff(cexdey1, cexdey0, dexcey1, dexcey0, cd3, cd[2], cd[1], cd[0]); + cd[3] = cd3; + + Two_Product(dex, aey, dexaey1, dexaey0); + Two_Product(aex, dey, aexdey1, aexdey0); + Two_Two_Diff(dexaey1, dexaey0, aexdey1, aexdey0, da3, da[2], da[1], da[0]); + da[3] = da3; + + Two_Product(aex, cey, aexcey1, aexcey0); + Two_Product(cex, aey, cexaey1, cexaey0); + Two_Two_Diff(aexcey1, aexcey0, cexaey1, cexaey0, ac3, ac[2], ac[1], ac[0]); + ac[3] = ac3; + + Two_Product(bex, dey, bexdey1, bexdey0); + Two_Product(dex, bey, dexbey1, dexbey0); + Two_Two_Diff(bexdey1, bexdey0, dexbey1, dexbey0, bd3, bd[2], bd[1], bd[0]); + bd[3] = bd3; + + temp8alen = scale_expansion_zeroelim(4, cd, bez, temp8a); + temp8blen = scale_expansion_zeroelim(4, bd, -cez, temp8b); + temp8clen = scale_expansion_zeroelim(4, bc, dez, temp8c); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24); + temp48len = scale_expansion_zeroelim(temp24len, temp24, aex, temp48); + xlen = scale_expansion_zeroelim(temp48len, temp48, -aex, xdet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, aey, temp48); + ylen = scale_expansion_zeroelim(temp48len, temp48, -aey, ydet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, aez, temp48); + zlen = scale_expansion_zeroelim(temp48len, temp48, -aez, zdet); + xylen = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet); + alen = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, adet); + + temp8alen = scale_expansion_zeroelim(4, da, cez, temp8a); + temp8blen = scale_expansion_zeroelim(4, ac, dez, temp8b); + temp8clen = scale_expansion_zeroelim(4, cd, aez, temp8c); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24); + temp48len = scale_expansion_zeroelim(temp24len, temp24, bex, temp48); + xlen = scale_expansion_zeroelim(temp48len, temp48, bex, xdet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, bey, temp48); + ylen = scale_expansion_zeroelim(temp48len, temp48, bey, ydet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, bez, temp48); + zlen = scale_expansion_zeroelim(temp48len, temp48, bez, zdet); + xylen = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet); + blen = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, bdet); + + temp8alen = scale_expansion_zeroelim(4, ab, dez, temp8a); + temp8blen = scale_expansion_zeroelim(4, bd, aez, temp8b); + temp8clen = scale_expansion_zeroelim(4, da, bez, temp8c); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24); + temp48len = scale_expansion_zeroelim(temp24len, temp24, cex, temp48); + xlen = scale_expansion_zeroelim(temp48len, temp48, -cex, xdet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, cey, temp48); + ylen = scale_expansion_zeroelim(temp48len, temp48, -cey, ydet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, cez, temp48); + zlen = scale_expansion_zeroelim(temp48len, temp48, -cez, zdet); + xylen = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet); + clen = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, cdet); + + temp8alen = scale_expansion_zeroelim(4, bc, aez, temp8a); + temp8blen = scale_expansion_zeroelim(4, ac, -bez, temp8b); + temp8clen = scale_expansion_zeroelim(4, ab, cez, temp8c); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24); + temp48len = scale_expansion_zeroelim(temp24len, temp24, dex, temp48); + xlen = scale_expansion_zeroelim(temp48len, temp48, dex, xdet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, dey, temp48); + ylen = scale_expansion_zeroelim(temp48len, temp48, dey, ydet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, dez, temp48); + zlen = scale_expansion_zeroelim(temp48len, temp48, dez, zdet); + xylen = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet); + dlen = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, ddet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + finlength = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, fin1); + + det = estimate(finlength, fin1); + errbound = isperrboundB * permanent; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Diff_Tail(pa[0], pe[0], aex, aextail); + Two_Diff_Tail(pa[1], pe[1], aey, aeytail); + Two_Diff_Tail(pa[2], pe[2], aez, aeztail); + Two_Diff_Tail(pb[0], pe[0], bex, bextail); + Two_Diff_Tail(pb[1], pe[1], bey, beytail); + Two_Diff_Tail(pb[2], pe[2], bez, beztail); + Two_Diff_Tail(pc[0], pe[0], cex, cextail); + Two_Diff_Tail(pc[1], pe[1], cey, ceytail); + Two_Diff_Tail(pc[2], pe[2], cez, ceztail); + Two_Diff_Tail(pd[0], pe[0], dex, dextail); + Two_Diff_Tail(pd[1], pe[1], dey, deytail); + Two_Diff_Tail(pd[2], pe[2], dez, deztail); + if((aextail == 0.0) && (aeytail == 0.0) && (aeztail == 0.0) && (bextail == 0.0) && (beytail == 0.0) && (beztail == 0.0) && + (cextail == 0.0) && (ceytail == 0.0) && (ceztail == 0.0) && (dextail == 0.0) && (deytail == 0.0) && (deztail == 0.0)) + { + return det; + } + + errbound = isperrboundC * permanent + resulterrbound * Absolute(det); + abeps = (aex * beytail + bey * aextail) - (aey * bextail + bex * aeytail); + bceps = (bex * ceytail + cey * bextail) - (bey * cextail + cex * beytail); + cdeps = (cex * deytail + dey * cextail) - (cey * dextail + dex * ceytail); + daeps = (dex * aeytail + aey * dextail) - (dey * aextail + aex * deytail); + aceps = (aex * ceytail + cey * aextail) - (aey * cextail + cex * aeytail); + bdeps = (bex * deytail + dey * bextail) - (bey * dextail + dex * beytail); + det += (((bex * bex + bey * bey + bez * bez) * + ((cez * daeps + dez * aceps + aez * cdeps) + (ceztail * da3 + deztail * ac3 + aeztail * cd3)) + + (dex * dex + dey * dey + dez * dez) * + ((aez * bceps - bez * aceps + cez * abeps) + (aeztail * bc3 - beztail * ac3 + ceztail * ab3))) - + ((aex * aex + aey * aey + aez * aez) * + ((bez * cdeps - cez * bdeps + dez * bceps) + (beztail * cd3 - ceztail * bd3 + deztail * bc3)) + + (cex * cex + cey * cey + cez * cez) * + ((dez * abeps + aez * bdeps + bez * daeps) + (deztail * ab3 + aeztail * bd3 + beztail * da3)))) + + 2.0 * (((bex * bextail + bey * beytail + bez * beztail) * (cez * da3 + dez * ac3 + aez * cd3) + + (dex * dextail + dey * deytail + dez * deztail) * (aez * bc3 - bez * ac3 + cez * ab3)) - + ((aex * aextail + aey * aeytail + aez * aeztail) * (bez * cd3 - cez * bd3 + dez * bc3) + + (cex * cextail + cey * ceytail + cez * ceztail) * (dez * ab3 + aez * bd3 + bez * da3))); + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + return insphereexact(pa, pb, pc, pd, pe); +} + +REAL insphere(pa, pb, pc, pd, pe) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +{ + REAL aex, bex, cex, dex; + REAL aey, bey, cey, dey; + REAL aez, bez, cez, dez; + REAL aexbey, bexaey, bexcey, cexbey, cexdey, dexcey, dexaey, aexdey; + REAL aexcey, cexaey, bexdey, dexbey; + REAL alift, blift, clift, dlift; + REAL ab, bc, cd, da, ac, bd; + REAL abc, bcd, cda, dab; + REAL aezplus, bezplus, cezplus, dezplus; + REAL aexbeyplus, bexaeyplus, bexceyplus, cexbeyplus; + REAL cexdeyplus, dexceyplus, dexaeyplus, aexdeyplus; + REAL aexceyplus, cexaeyplus, bexdeyplus, dexbeyplus; + REAL det; + REAL permanent, errbound; + + aex = pa[0] - pe[0]; + bex = pb[0] - pe[0]; + cex = pc[0] - pe[0]; + dex = pd[0] - pe[0]; + aey = pa[1] - pe[1]; + bey = pb[1] - pe[1]; + cey = pc[1] - pe[1]; + dey = pd[1] - pe[1]; + aez = pa[2] - pe[2]; + bez = pb[2] - pe[2]; + cez = pc[2] - pe[2]; + dez = pd[2] - pe[2]; + + aexbey = aex * bey; + bexaey = bex * aey; + ab = aexbey - bexaey; + bexcey = bex * cey; + cexbey = cex * bey; + bc = bexcey - cexbey; + cexdey = cex * dey; + dexcey = dex * cey; + cd = cexdey - dexcey; + dexaey = dex * aey; + aexdey = aex * dey; + da = dexaey - aexdey; + + aexcey = aex * cey; + cexaey = cex * aey; + ac = aexcey - cexaey; + bexdey = bex * dey; + dexbey = dex * bey; + bd = bexdey - dexbey; + + abc = aez * bc - bez * ac + cez * ab; + bcd = bez * cd - cez * bd + dez * bc; + cda = cez * da + dez * ac + aez * cd; + dab = dez * ab + aez * bd + bez * da; + + alift = aex * aex + aey * aey + aez * aez; + blift = bex * bex + bey * bey + bez * bez; + clift = cex * cex + cey * cey + cez * cez; + dlift = dex * dex + dey * dey + dez * dez; + + det = (dlift * abc - clift * dab) + (blift * cda - alift * bcd); + + aezplus = Absolute(aez); + bezplus = Absolute(bez); + cezplus = Absolute(cez); + dezplus = Absolute(dez); + aexbeyplus = Absolute(aexbey); + bexaeyplus = Absolute(bexaey); + bexceyplus = Absolute(bexcey); + cexbeyplus = Absolute(cexbey); + cexdeyplus = Absolute(cexdey); + dexceyplus = Absolute(dexcey); + dexaeyplus = Absolute(dexaey); + aexdeyplus = Absolute(aexdey); + aexceyplus = Absolute(aexcey); + cexaeyplus = Absolute(cexaey); + bexdeyplus = Absolute(bexdey); + dexbeyplus = Absolute(dexbey); + permanent = + ((cexdeyplus + dexceyplus) * bezplus + (dexbeyplus + bexdeyplus) * cezplus + (bexceyplus + cexbeyplus) * dezplus) * alift + + ((dexaeyplus + aexdeyplus) * cezplus + (aexceyplus + cexaeyplus) * dezplus + (cexdeyplus + dexceyplus) * aezplus) * blift + + ((aexbeyplus + bexaeyplus) * dezplus + (bexdeyplus + dexbeyplus) * aezplus + (dexaeyplus + aexdeyplus) * bezplus) * clift + + ((bexceyplus + cexbeyplus) * aezplus + (cexaeyplus + aexceyplus) * bezplus + (aexbeyplus + bexaeyplus) * cezplus) * dlift; + errbound = isperrboundA * permanent; + if((det > errbound) || (-det > errbound)) + { + return det; + } + + return insphereadapt(pa, pb, pc, pd, pe, permanent); +} diff --git a/src/amuse/community/arepo/src/utils/system.c b/src/amuse/community/arepo/src/utils/system.c new file mode 100644 index 0000000000..027974da55 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/system.c @@ -0,0 +1,1300 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/system.c + * \date 05/2018 + * \brief Small functions for interaction with operating system and + * libraries and other auxiliary functions. + * \details contains functions: + * void subdivide_evenly(int N, int pieces, int index, int *first, int *count) + * void permutate_chunks_in_list(int ncount, int *list) + * int get_thread_num(void) + * int system_compare_hostname(const void *a, const void *b) + * int system_compare_first_task(const void *a, const void *b) + * int system_compare_task(const void *a, const void *b) + * void determine_compute_nodes(void) + * void allreduce_sparse_double_sum(double *loc, double *glob, int N) + * void allreduce_sparse_imin(int *loc, int *glob, int N) + * double mysort(void *base, size_t nel, size_t width, int (*compar) (const void *, const void *)) + * double dabs(double a) + * double dmax(double a, double b) + * size_t smax(size_t a, size_t b) + * double dmin(double a, double b) + * double max_array(double *a, int num_elements) + * int imax(int a, int b) + * int imin(int a, int b) + * int myflush(FILE * fstream) + * int flush_everything(void) + * void enable_core_dumps_and_fpu_exceptions(void) + * void my_gsl_error_handler(const char *reason, const char *file, int line, int gsl_errno) + * double get_random_number(void) + * double get_random_number_aux(void) + * double second(void) + * double measure_time(void) + * double timediff(double t0, double t1) + * void minimum_large_ints(int n, long long *src, long long *res) + * void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm) + * void sumup_large_ints(int n, int *src, long long *res) + * void sumup_longs(int n, long long *src, long long *res) + * size_t sizemax(size_t a, size_t b) + * void report_VmRSS(void) + * long long report_comittable_memory(long long *MemTotal, long long *Committed_AS, long long *SwapTotal, long long + * *SwapFree) void check_maxmemsize_setting(void) void mpi_report_committable_memory(void) int my_ffsll(peanokey i) int my_fls(int x) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Divides N elements evenly on pieces chunks, writes in first and + * count arrays. + * + * \param[in] N Number of elements. + * \param[in] pieces Number of chunks. + * \param[in] index Index of piece that is needed as output. + * \param[out] first Index of first element of piece number 'index'. + * \param[out] count Number of elements of piece number 'index'. + * + * \return void + */ +void subdivide_evenly(int N, int pieces, int index, int *first, int *count) +{ + int avg = (N - 1) / pieces + 1; + int exc = pieces * avg - N; + int indexlastsection = pieces - exc; + + if(index < indexlastsection) + { + *first = index * avg; + *count = avg; + } + else + { + *first = index * avg - (index - indexlastsection); + *count = avg - 1; + } +} + +/*! \brief Permutes chunks in a list. + * + * \param[in] ncount Number of elements in list. + * \param[in, out] list List to be permuted. + * + * \return void + */ +void permutate_chunks_in_list(int ncount, int *list) +{ +#define WALK_N_PIECES 32 /*!< Number of sets, the chunks are divided into */ +#define WALK_N_SIZE 500 /*!< Number of particles per chunk */ + + int nchunk; /*!< Number of chunk sets used */ + int nchunksize; /*!< Size of each chunk */ + int currentchunk; /*!< Chunk set currently processed */ + int nextparticle; + + if(ncount > WALK_N_PIECES * WALK_N_SIZE) + { + nchunk = WALK_N_PIECES; + nchunksize = WALK_N_SIZE; + } + else + { + nchunk = 1; + nchunksize = ncount; + } + + currentchunk = 0; + + int *chunked_TargetList = (int *)mymalloc("chunked_TargetList", ncount * sizeof(int)); + int n, i; + for(n = 0, nextparticle = 0; n < ncount; n++) + { + i = nextparticle; + + chunked_TargetList[n] = list[i]; + if(i < ncount) + { + nextparticle++; + + if((nextparticle % nchunksize) == 0) + nextparticle += (nchunk - 1) * nchunksize; + + if(nextparticle >= ncount) + { + currentchunk++; + if(currentchunk < nchunk) + nextparticle = currentchunk * nchunksize; + } + } + } + + for(n = 0; n < ncount; n++) + list[n] = chunked_TargetList[n]; + + myfree(chunked_TargetList); +} + +/*! \brief Returns thread number. + * + * Unused. + * + * \return 0 + */ +int get_thread_num(void) { return 0; } + +/*! \brief Structure for a data of compute node. + */ +static struct node_data +{ + int task, this_node, first_task_in_this_node; + int first_index, rank_in_node, tasks_in_node; + char name[MPI_MAX_PROCESSOR_NAME]; +} loc_node, *list_of_nodes; + +/*! \brief Compares first nodename and then task of node_data objects. + * + * Sorting kernel. + * + * \param[in] a First element to compare. + * \param[in] b Second element to compare. + * + * \return (-1,0,1); -1 if aname, ((struct node_data *)b)->name); + + if(cmp == 0) + { + if(((struct node_data *)a)->task < ((struct node_data *)b)->task) + cmp = -1; + else + cmp = +1; + } + + return cmp; +} + +/*! \brief Compares node_data objects; first first_task_this_node and then + * task. + * + * Sorting kernel. + * + * \param[in] a First element to compare. + * \param[in] b Second element to compare. + * + * \return (-1,0,1); -1 if afirst_task_in_this_node < ((struct node_data *)b)->first_task_in_this_node) + return -1; + + if(((struct node_data *)a)->first_task_in_this_node > ((struct node_data *)b)->first_task_in_this_node) + return +1; + + if(((struct node_data *)a)->task < ((struct node_data *)b)->task) + return -1; + + if(((struct node_data *)a)->task > ((struct node_data *)b)->task) + return +1; + + return 0; +} + +/*! \brief Compares task of node_data objects + * + * Sorting kernel. + * + * \param[in] a First element to compare. + * \param[in] b Second element to compare. + * + * \return (-1,0,1); -1 if a->task < b->task + */ +int system_compare_task(const void *a, const void *b) +{ + if(((struct node_data *)a)->task < ((struct node_data *)b)->task) + return -1; + + if(((struct node_data *)a)->task > ((struct node_data *)b)->task) + return +1; + + return 0; +} + +/*! \brief Determines the compute nodes the simulation is running on. + * + * Reports this to file uses-machines.txt. + * + * \return void + */ +void determine_compute_nodes(void) +{ + int len, nodes, i, no, rank, first_index; + + MPI_Get_processor_name(loc_node.name, &len); + loc_node.task = ThisTask; + + list_of_nodes = malloc(sizeof(struct node_data) * + NTask); /* Note: Internal memory allocation routines are not yet available when this function is called */ + + MPI_Allgather(&loc_node, sizeof(struct node_data), MPI_BYTE, list_of_nodes, sizeof(struct node_data), MPI_BYTE, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + FILE *fd; + if(!(fd = fopen("uses-machines.txt", "w"))) + terminate("can't write file with used machines"); + for(i = 0; i < NTask; i++) + fprintf(fd, "%5d %s\n", list_of_nodes[i].task, list_of_nodes[i].name); + fclose(fd); + } + + qsort(list_of_nodes, NTask, sizeof(struct node_data), system_compare_hostname); + + list_of_nodes[0].first_task_in_this_node = list_of_nodes[0].task; + + for(i = 1, nodes = 1; i < NTask; i++) + { + if(strcmp(list_of_nodes[i].name, list_of_nodes[i - 1].name) != 0) + { + list_of_nodes[i].first_task_in_this_node = list_of_nodes[i].task; + nodes++; + } + else + list_of_nodes[i].first_task_in_this_node = list_of_nodes[i - 1].first_task_in_this_node; + } + + qsort(list_of_nodes, NTask, sizeof(struct node_data), system_compare_first_task); + + for(i = 0; i < NTask; i++) + list_of_nodes[i].tasks_in_node = 0; + + for(i = 0, no = 0, rank = 0, first_index = 0; i < NTask; i++) + { + if(i ? list_of_nodes[i].first_task_in_this_node != list_of_nodes[i - 1].first_task_in_this_node : 0) + { + no++; + rank = 0; + first_index = i; + } + + list_of_nodes[i].first_index = first_index; + list_of_nodes[i].this_node = no; + list_of_nodes[i].rank_in_node = rank++; + list_of_nodes[first_index].tasks_in_node++; + } + + int max_count = 0; + int min_count = (1 << 30); + + for(i = 0; i < NTask; i++) + { + list_of_nodes[i].tasks_in_node = list_of_nodes[list_of_nodes[i].first_index].tasks_in_node; + + if(list_of_nodes[i].tasks_in_node > max_count) + max_count = list_of_nodes[i].tasks_in_node; + if(list_of_nodes[i].tasks_in_node < min_count) + min_count = list_of_nodes[i].tasks_in_node; + } + + qsort(list_of_nodes, NTask, sizeof(struct node_data), system_compare_task); + + TasksInThisNode = list_of_nodes[ThisTask].tasks_in_node; + RankInThisNode = list_of_nodes[ThisTask].rank_in_node; + + ThisNode = list_of_nodes[ThisTask].this_node; + + NumNodes = nodes; + MinTasksPerNode = min_count; + MaxTasksPerNode = max_count; + + free(list_of_nodes); +} + +/*! \brief Home-made Allreduce function for double variables with sum reduction + * operation, optimized for sparse vectors. + * + * Tries to avoid communicating and adding up a lot of zeros, which can be + * faster than a brute-force MPI_Allreduce. + * + * \param[in] loc Local array. + * \param[out] glob Global (result) array. + * \param[in] N number of elements in array. + * + * \return void + */ +void allreduce_sparse_double_sum(double *loc, double *glob, int N) +{ + int i, j, n, loc_first_n, nimport, nexport, task, ngrp; + + int *send_count = mymalloc("send_count", sizeof(int) * NTask); + int *recv_count = mymalloc("recv_count", sizeof(int) * NTask); + int *send_offset = mymalloc("send_offset", sizeof(int) * NTask); + int *recv_offset = mymalloc("recv_offset", sizeof(int) * NTask); + int *blocksize = mymalloc("blocksize", sizeof(int) * NTask); + + int blk = N / NTask; + int rmd = N - blk * NTask; /* remainder */ + int pivot_n = rmd * (blk + 1); + + for(task = 0, loc_first_n = 0; task < NTask; task++) + { + if(task < rmd) + blocksize[task] = blk + 1; + else + blocksize[task] = blk; + + if(task < ThisTask) + loc_first_n += blocksize[task]; + } + + double *loc_data = mymalloc("loc_data", blocksize[ThisTask] * sizeof(double)); + memset(loc_data, 0, blocksize[ThisTask] * sizeof(double)); + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + /* find for each non-zero element the processor where it should go for being summed */ + for(n = 0; n < N; n++) + { + if(loc[n] != 0) + { + if(n < pivot_n) + task = n / (blk + 1); + else + task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */ + + send_count[task]++; + } + } + + MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nexport = 0, nimport = 0, recv_offset[0] = 0, send_offset[0] = 0; j < NTask; j++) + { + nexport += send_count[j]; + nimport += recv_count[j]; + if(j > 0) + { + send_offset[j] = send_offset[j - 1] + send_count[j - 1]; + recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1]; + } + } + + struct ind_data + { + int n; + double val; + } * export_data, *import_data; + + export_data = mymalloc("export_data", nexport * sizeof(struct ind_data)); + import_data = mymalloc("import_data", nimport * sizeof(struct ind_data)); + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + for(n = 0; n < N; n++) + { + if(loc[n] != 0) + { + if(n < pivot_n) + task = n / (blk + 1); + else + task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */ + + int index = send_offset[task] + send_count[task]++; + export_data[index].n = n; + export_data[index].val = loc[n]; + } + } + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */ + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(send_count[recvTask] > 0 || recv_count[recvTask] > 0) + MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, recvTask, + TAG_DENS_B, &import_data[recv_offset[recvTask]], recv_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, + recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + for(i = 0; i < nimport; i++) + { + int j = import_data[i].n - loc_first_n; + + if(j < 0 || j >= blocksize[ThisTask]) + terminate("j=%d < 0 || j>= blocksize[ThisTask]=%d", j, blocksize[ThisTask]); + + loc_data[j] += import_data[i].val; + } + + myfree(import_data); + myfree(export_data); + + /* now share the cost data across all processors */ + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(task = 0; task < NTask; task++) + bytecounts[task] = blocksize[task] * sizeof(double); + + for(task = 1, byteoffset[0] = 0; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + MPI_Allgatherv(loc_data, bytecounts[ThisTask], MPI_BYTE, glob, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + myfree(byteoffset); + myfree(bytecounts); + + myfree(loc_data); + myfree(blocksize); + myfree(recv_offset); + myfree(send_offset); + myfree(recv_count); + myfree(send_count); +} + +/*! \brief Home-made Allreduce function for int variables with minimum as a + * reduction operation. + * + * Tries to avoid communicating and adding up a lot of zeros, which can be + * faster than a brute-force MPI_Allreduce. + * + * \param[in] loc Local array. + * \param[out] glob Global (result) array. + * \param[in] N number of elements in array. + * + * \return void + */ +void allreduce_sparse_imin(int *loc, int *glob, int N) +{ + int i, j, n, loc_first_n, nimport, nexport, task, ngrp; + + int *send_count = mymalloc("send_count", sizeof(int) * NTask); + int *recv_count = mymalloc("recv_count", sizeof(int) * NTask); + int *send_offset = mymalloc("send_offset", sizeof(int) * NTask); + int *recv_offset = mymalloc("recv_offset", sizeof(int) * NTask); + int *blocksize = mymalloc("blocksize", sizeof(int) * NTask); + + int blk = N / NTask; + int rmd = N - blk * NTask; /* remainder */ + int pivot_n = rmd * (blk + 1); + + for(task = 0, loc_first_n = 0; task < NTask; task++) + { + if(task < rmd) + blocksize[task] = blk + 1; + else + blocksize[task] = blk; + + if(task < ThisTask) + loc_first_n += blocksize[task]; + } + + int *loc_data = mymalloc("loc_data", blocksize[ThisTask] * sizeof(int)); + for(i = 0; i < blocksize[ThisTask]; i++) + { + loc_data[i] = INT_MAX; + } + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + /* find for each non-zero element the processor where it should go for being summed */ + for(n = 0; n < N; n++) + { + if(loc[n] != 0) + { + if(n < pivot_n) + task = n / (blk + 1); + else + task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */ + + send_count[task]++; + } + } + + MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nexport = 0, nimport = 0, recv_offset[0] = 0, send_offset[0] = 0; j < NTask; j++) + { + nexport += send_count[j]; + nimport += recv_count[j]; + if(j > 0) + { + send_offset[j] = send_offset[j - 1] + send_count[j - 1]; + recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1]; + } + } + + struct ind_data + { + int n; + int val; + } * export_data, *import_data; + + export_data = mymalloc("export_data", nexport * sizeof(struct ind_data)); + import_data = mymalloc("import_data", nimport * sizeof(struct ind_data)); + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + for(n = 0; n < N; n++) + { + if(loc[n] != 0) + { + if(n < pivot_n) + task = n / (blk + 1); + else + task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */ + + int index = send_offset[task] + send_count[task]++; + export_data[index].n = n; + export_data[index].val = loc[n]; + } + } + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */ + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(send_count[recvTask] > 0 || recv_count[recvTask] > 0) + MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, recvTask, + TAG_DENS_B, &import_data[recv_offset[recvTask]], recv_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, + recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + for(i = 0; i < nimport; i++) + { + int j = import_data[i].n - loc_first_n; + + if(j < 0 || j >= blocksize[ThisTask]) + terminate("j=%d < 0 || j>= blocksize[ThisTask]=%d", j, blocksize[ThisTask]); + + loc_data[j] = imin(loc_data[j], import_data[i].val); + } + + myfree(import_data); + myfree(export_data); + + /* now share the cost data across all processors */ + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(task = 0; task < NTask; task++) + bytecounts[task] = blocksize[task] * sizeof(int); + + for(task = 1, byteoffset[0] = 0; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + MPI_Allgatherv(loc_data, bytecounts[ThisTask], MPI_BYTE, glob, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + myfree(byteoffset); + myfree(bytecounts); + + myfree(loc_data); + myfree(blocksize); + myfree(recv_offset); + myfree(send_offset); + myfree(recv_count); + myfree(send_count); +} + +/*! \brief Wrapper function for quicksort. + * + * \param[in, out] base Array to be sorted. + * \param[in] nel Number of elements to be sorted. + * \param[in] width Size of each element in array. + * \param [in] compar Compare function (sorting kernel). + * + * \return The elapsed CPU time. + */ +double mysort(void *base, size_t nel, size_t width, int (*compar)(const void *, const void *)) +{ + double t0, t1; + + t0 = second(); + + qsort(base, nel, width, compar); + + t1 = second(); + + return timediff(t0, t1); +} + +/*! \brief Absolute value of a double variable. + * + * \param[in] a Double variable. + * + * \return Absolute value of a. + */ +double dabs(double a) +{ + if(a < 0) + return -a; + else + return a; +} + +/*! \brief Maximum value of two double variables. + * + * \param[in] a First variable. + * \param[in] b Second variable. + * + * \return Maximum value of a and b. + */ +double dmax(double a, double b) +{ + if(a > b) + return a; + else + return b; +} + +/*! \brief Maximum value of two size_t type variables. + * + * \param[in] a First variable. + * \param[in] b Second variable. + * + * \return Maximum value of the two variables. + */ +size_t smax(size_t a, size_t b) +{ + if(a > b) + return a; + else + return b; +} + +/*! \brief Minimum value of two double variables. + * + * \param[in] a First variable. + * \param[in] b Second variable. + * + * \return Minimum value of a and b. + */ +double dmin(double a, double b) +{ + if(a < b) + return a; + else + return b; +} + +/*! \brief Maximum value in an array of double variables. + * + * \param[in] a Array of double variables. + * \param[in] num_elements Number of elements in array. + * + * \return Maximum value. + */ +double max_array(double *a, int num_elements) +{ + int i; + double max = -DBL_MAX; + for(i = 0; i < num_elements; i++) + { + if(a[i] > max) + { + max = a[i]; + } + } + return (max); +} + +/*! \brief Maximum value of two integers. + * + * \param[in] a First integer variable. + * \param[in] b Second integer variable. + * + * \return Maximum of a and b. + */ +int imax(int a, int b) +{ + if(a > b) + return a; + else + return b; +} + +/*! \brief Minimum value of two integers. + * + * \param[in] a First integer variable. + * \param[in] b Second integer variable. + * + * \return Minimum of a and b. + */ +int imin(int a, int b) +{ + if(a < b) + return a; + else + return b; +} + +/*! \brief Flush (i.e. empty buffer) of a file output stream. + * + * \brief[in] fstream Pointer to file output. + * + * \return Status. + */ +int myflush(FILE *fstream) +{ +#ifdef REDUCE_FLUSH + /* do nothing */ + return 0; +#else /* #ifdef REDUCE_FLUSH */ + return fflush(fstream); +#endif /* #ifdef REDUCE_FLUSH #else */ +} + +/*! \brief Flush for all global log-files. + * + * Only flushes in predefined intervals. + * + * \return status (0: did nothing, 1 did flush) + */ +int flush_everything(void) +{ +#ifndef REDUCE_FLUSH + return 0; +#else /* #ifndef REDUCE_FLUSH */ + if(ThisTask == 0) + { + if((CPUThisRun - All.FlushLast) < All.FlushCpuTimeDiff) + { + return 0; + } + else + { + All.FlushLast = CPUThisRun; + } + } + else + { + return 0; + } +#endif /* #ifndef REDUCE_FLUSH #else */ + + mpi_printf("Flushing...\n"); + + fflush(FdDomain); + fflush(FdMemory); + fflush(FdTimings); + fflush(FdInfo); + fflush(FdTimebin); + fflush(FdBalance); + fflush(FdCPU); + fflush(FdEnergy); + +#ifdef OUTPUT_CPU_CSV + fflush(FdCPUCSV); +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef USE_SFR + fflush(FdSfr); +#endif + + return 1; +} + +#ifdef DEBUG +#include +/*! \brief Allows core dumps that are readable by debugger. + * + * \return void + */ +void enable_core_dumps_and_fpu_exceptions(void) +{ +#ifdef DEBUG_ENABLE_FPU_EXCEPTIONS + /* enable floating point exceptions */ + + extern int feenableexcept(int __excepts); + feenableexcept(FE_DIVBYZERO | FE_INVALID); + + /* Note: FPU exceptions appear not to work properly + * when the Intel C-Compiler for Linux is used + */ +#endif /* #ifdef DEBUG_ENABLE_FPU_EXCEPTIONS */ + + /* set core-dump size to infinity */ + struct rlimit rlim; + getrlimit(RLIMIT_CORE, &rlim); + rlim.rlim_cur = RLIM_INFINITY; + setrlimit(RLIMIT_CORE, &rlim); + + /* MPICH catches the signales SIGSEGV, SIGBUS, and SIGFPE.... + * The following statements reset things to the default handlers, + * which will generate a core file. + */ + signal(SIGSEGV, SIG_DFL); + signal(SIGBUS, SIG_DFL); + signal(SIGFPE, SIG_DFL); + signal(SIGINT, SIG_DFL); +} +#endif /* #ifdef DEBUG */ + +/*! \brief Wrapper for error handling; terminates code. + * + * \param[in] reason Error message. + * \param[in] file File in which error occured. + * \param[in] line Line in which error occured. + * \param[in] gsl_errno Error code. + * + * \return void + */ +void my_gsl_error_handler(const char *reason, const char *file, int line, int gsl_errno) +{ + terminate("GSL has reported an error: reason='%s', error handler called from file '%s', line %d, with error code %d", reason, file, + line, gsl_errno); +} + +/*! \brief Returns a random number from standard random number generator. + * + * \return Random number [0,1). + */ +double get_random_number(void) { return gsl_rng_uniform(random_generator); } + +/*! \brief Returns a random number from auxiliary random number generator. + * + * \return Random number [0,1). + */ +double get_random_number_aux(void) { return gsl_rng_uniform(random_generator_aux); } + +/*! \brief Wall-clock time in seconds. + * + * \return The current value of time as a floating-point value. + */ +double second(void) { return MPI_Wtime(); } + +/*! \brief Timing routine. + * + * Strategy: call this at end of functions to account for time in this + * function, and before another (nontrivial) function is called. + * + * \return Time passed since last call of this function. + */ +double measure_time(void) +{ + double t, dt; + + t = second(); + dt = t - WallclockTime; + WallclockTime = t; + + return dt; +} + +/*! \brief Time difference. + * + * Returns the time difference between two measurements + * obtained with second(). The routine takes care of the + * possible overflow of the tick counter on 32bit systems. + * + * \param[in] t0 First time. + * \param[in] t1 Second time. + * + * \return Time difference. + */ +double timediff(double t0, double t1) +{ + double dt; + + dt = t1 - t0; + + if(dt < 0) /* overflow has occured (for systems with 32bit tick counter) */ + { +#ifdef WALLCLOCK + dt = 0; +#else /* #ifdef WALLCLOCK */ + dt = t1 + pow(2, 32) / CLOCKS_PER_SEC - t0; +#endif /* #ifdef WALLCLOCK #else */ + } + + return dt; +} + +/*! \brief Global minimum of long long variables. + * + * \param[in] n Length of array. + * \param[in] src Source array. + * \param[in] res Result array. + * + * \return void + */ +void minimum_large_ints(int n, long long *src, long long *res) +{ + if(src == res) + { + /* we need a buffer */ + long long buf[n]; + memcpy(buf, src, n * sizeof(long long)); + MPI_Allreduce(buf, res, n, MPI_LONG_LONG_INT, MPI_MIN, MPI_COMM_WORLD); + } + else + MPI_Allreduce(src, res, n, MPI_LONG_LONG_INT, MPI_MIN, MPI_COMM_WORLD); +} + +/*! \brief Global sum of an array of int variables into a long long. + * + * Can be used with arbitrary MPI communicator. + * + * \param[in] n Length of array. + * \param[in] src Source array. + * \param[in] res Result array. + * \param[in] comm MPI communicator. + * + * \return void + */ +void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm) +{ + long long lsrc[n]; + + for(int i = 0; i < n; i++) + lsrc[i] = src[i]; + + MPI_Allreduce(lsrc, res, n, MPI_LONG_LONG_INT, MPI_SUM, comm); +} + +/*! \brief Global sum of an array of int variables into a long long. + * + * To prevent overflow when summing up; wrapper funciton for + * sumup_large_ints_comm. + * + * \param[in] n Length of array. + * \param[in] src Source array. + * \param[in] res Result array. + * + * \return void + */ +void sumup_large_ints(int n, int *src, long long *res) { sumup_large_ints_comm(n, src, res, MPI_COMM_WORLD); } + +/*! \brief Global sum of an array of long long variables. + * + * Automatically allocates a buffer, if source and result array are identical. + * + * \param[in] n Length of array. + * \param[in] src Source array. + * \param[in] res Result array. + * + * \return void + */ +void sumup_longs(int n, long long *src, long long *res) +{ + if(src == res) + { + /* we need a buffer */ + long long buf[n]; + memcpy(buf, src, n * sizeof(long long)); + MPI_Allreduce(buf, res, n, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); + } + else + MPI_Allreduce(src, res, n, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); +} + +/*! \brief Compares two elements of type size_t. + * + * \param[in] a First element. + * \param[in] b Second element. + * + * \return The larger of both elements, first one if equal. + */ +size_t sizemax(size_t a, size_t b) +{ + if(a < b) + return b; + else + return a; +} + +/*! \brief Reads from process info file of linux system. + * + * \return void + */ +void report_VmRSS(void) +{ + pid_t my_pid; + FILE *fd; + char buf[1024]; + + my_pid = getpid(); + + sprintf(buf, "/proc/%d/status", my_pid); + + if((fd = fopen(buf, "r"))) + { + while(1) + { + if(fgets(buf, 500, fd) != buf) + break; + + if(strncmp(buf, "VmRSS", 5) == 0) + { + printf("ThisTask=%d: %s", ThisTask, buf); + } + if(strncmp(buf, "VmSize", 6) == 0) + { + printf("ThisTask=%d: %s", ThisTask, buf); + } + } + fclose(fd); + } +} + +/*! \brief Reads from memory info file of Linux system. + * + * \return Comittable memory. + */ +long long report_comittable_memory(long long *MemTotal, long long *Committed_AS, long long *SwapTotal, long long *SwapFree) +{ + FILE *fd; + char buf[1024]; + + if((fd = fopen("/proc/meminfo", "r"))) + { + while(1) + { + if(fgets(buf, 500, fd) != buf) + break; + + if(bcmp(buf, "MemTotal", 8) == 0) + { + *MemTotal = atoll(buf + 10); + } + if(strncmp(buf, "Committed_AS", 12) == 0) + { + *Committed_AS = atoll(buf + 14); + } + if(strncmp(buf, "SwapTotal", 9) == 0) + { + *SwapTotal = atoll(buf + 11); + } + if(strncmp(buf, "SwapFree", 8) == 0) + { + *SwapFree = atoll(buf + 10); + } + } + fclose(fd); + } + + return (*MemTotal - *Committed_AS); +} + +/*! \brief Checks if parameter max memsize is smaller than avialable memory. + * + * \return void + */ +void check_maxmemsize_setting(void) +{ + int errflag = 0, errflag_tot; + + if(All.MaxMemSize > (MemoryOnNode / 1024.0 / TasksInThisNode) && RankInThisNode == 0) + { + printf("On node '%s', we have %d MPI ranks and at most %g MB available. This is not enough space for MaxMemSize = %g MB\n", + loc_node.name, TasksInThisNode, MemoryOnNode / 1024.0, (double)All.MaxMemSize); + errflag = 1; + fflush(stdout); + } + + MPI_Allreduce(&errflag, &errflag_tot, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); +#ifndef __OSX__ + if(errflag_tot) + mpi_terminate("Not enough memory error!"); +#endif /* #ifndef __OSX__ */ +} + +/*! \brief Gathers memory information from tasks and write them stdout. + * + * Part of HOST_MEMORY_REPORTING, printed at startup. + * + * \return void + */ +void mpi_report_committable_memory(void) +{ + long long *sizelist, maxsize[6], minsize[6]; + double avgsize[6]; + int i, imem, mintask[6], maxtask[6]; + long long Mem[6]; + char label[512]; + + Mem[0] = report_comittable_memory(&Mem[1], &Mem[2], &Mem[3], &Mem[4]); + Mem[5] = Mem[1] - Mem[0]; + + MemoryOnNode = Mem[1]; + + for(imem = 0; imem < 6; imem++) + { + sizelist = (long long *)malloc(NTask * sizeof(long long)); + MPI_Allgather(&Mem[imem], sizeof(long long), MPI_BYTE, sizelist, sizeof(long long), MPI_BYTE, MPI_COMM_WORLD); + + for(i = 1, mintask[imem] = 0, maxtask[imem] = 0, maxsize[imem] = minsize[imem] = sizelist[0], avgsize[imem] = sizelist[0]; + i < NTask; i++) + { + if(sizelist[i] > maxsize[imem]) + { + maxsize[imem] = sizelist[i]; + maxtask[imem] = i; + } + if(sizelist[i] < minsize[imem]) + { + minsize[imem] = sizelist[i]; + mintask[imem] = i; + } + avgsize[imem] += sizelist[i]; + } + + free(sizelist); + } + + if(ThisTask == 0) + { + printf( + "\n-------------------------------------------------------------------------------------------------------------------------" + "\n"); + for(imem = 0; imem < 6; imem++) + { + switch(imem) + { + case 0: + sprintf(label, "AvailMem"); + break; + case 1: + sprintf(label, "Total Mem"); + break; + case 2: + sprintf(label, "Committed_AS"); + break; + case 3: + sprintf(label, "SwapTotal"); + break; + case 4: + sprintf(label, "SwapFree"); + break; + case 5: + sprintf(label, "AllocMem"); + break; + } + printf("%s:\t Largest = %10.2f Mb (on task=%4d), Smallest = %10.2f Mb (on task=%4d), Average = %10.2f Mb\n", label, + maxsize[imem] / (1024.0), maxtask[imem], minsize[imem] / (1024.0), mintask[imem], avgsize[imem] / (1024.0 * NTask)); + } + printf( + "-------------------------------------------------------------------------------------------------------------------------" + "\n"); + } + + char name[MPI_MAX_PROCESSOR_NAME]; + + if(ThisTask == maxtask[2]) + { + int len; + MPI_Get_processor_name(name, &len); + } + + MPI_Bcast(name, MPI_MAX_PROCESSOR_NAME, MPI_BYTE, maxtask[2], MPI_COMM_WORLD); + + if(ThisTask == 0) + { + printf("Task=%d has the maximum commited memory and is host: %s\n", maxtask[2], name); + printf( + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + "\n"); + } + + fflush(stdout); +} + +/*! \brief Find the first bit set in the argument. + * + * \param[in] i Peankoey variable. + * + * \return First bit set (type int). + */ +int my_ffsll(peanokey i) +{ + int res = 0; + + while(i > 0xffffffff) + { + res += 32; + i >>= 32; + } + + return res + ffs(i); +} + +/*! \brief Finds last bit set in x. + * + * The following function appears in the linux kernel. + * + * \param[in] x Ineger Input. + * + * \return Last bit set in x. + */ +int my_fls(int x) +{ + int r = 32; + + if(!x) + return 0; + if(!(x & 0xffff0000u)) + { + x <<= 16; + r -= 16; + } + if(!(x & 0xff000000u)) + { + x <<= 8; + r -= 8; + } + if(!(x & 0xf0000000u)) + { + x <<= 4; + r -= 4; + } + if(!(x & 0xc0000000u)) + { + x <<= 2; + r -= 2; + } + if(!(x & 0x80000000u)) + { + x <<= 1; + r -= 1; + } + return r; +} diff --git a/src/amuse/community/arepo/src/utils/tags.h b/src/amuse/community/arepo/src/utils/tags.h new file mode 100644 index 0000000000..e26bbaa4a5 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/tags.h @@ -0,0 +1,50 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/tags.h + * \date 05/2018 + * \brief Tag defines. + * \details Choice of numbers for historic reasons. + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#define TAG_N 10 /*!< Various tags used for labelling MPI messages */ +#define TAG_HEADER 11 +#define TAG_PDATA 12 +#define TAG_SPHDATA 13 +#define TAG_KEY 14 +#define TAG_GRAV_B 19 +#define TAG_HYDRO_A 22 +#define TAG_HYDRO_B 23 +#define TAG_NFORTHISTASK 24 +#define TAG_NONPERIOD_A 29 +#define TAG_NONPERIOD_B 30 +#define TAG_NONPERIOD_C 31 +#define TAG_DENS_A 35 +#define TAG_DENS_B 36 +#define TAG_LOCALN 37 +#define TAG_FOF_A 45 +#define TAG_PDATA_SPH 70 +#define TAG_KEY_SPH 71 +#define TAG_BARRIER 85 +#define TAG_NODE_DATA 105 diff --git a/src/amuse/community/arepo/src/utils/timer.h b/src/amuse/community/arepo/src/utils/timer.h new file mode 100644 index 0000000000..a622d1e8e5 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/timer.h @@ -0,0 +1,251 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/timer.h + * \date 05/2018 + * \brief Timer macros for Arepo. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#if !defined(TIMER_H) || defined(TIMER_STRUCT) +#define TIMER_H + +#define DETAILED_TIMING_GRAVWALK 0 +#define DETAILED_TIMING_STELLARDENSITY 1 + +#define TIMER_INSTRUMENT_START(counter) +#define TIMER_INSTRUMENT_STOP(counter) +#define TIMER_INSTRUMENT_CREATE(name, descr) ; + +#ifdef TIMER_STRUCT +#undef TIMER_CREATE +/*! \def TIMER_CREATE(name,desc, par, symba, symbb ) + * \brief creates a new CPU timer + * + * \param name name used in the code to reference this timer + * \param desc description string used in output files + * \param parent parent of this timer to build a tree-like hierarchy of timers + * \param symba character used for active time in balance.txt + * \param symbb character used for imbalance in balance.txt + * + */ +#define TIMER_CREATE(name, desc, par, symba, symbb) \ + Timer_data[name].parent = par; \ + strncpy(Timer_data[name].shortname, #name, 40); \ + strncpy(Timer_data[name].longname, (desc), 40); \ + Timer_data[name].symb = (symba); \ + Timer_data[name].symbImbal = (symbb); \ + TIMER_INSTRUMENT_CREATE(name, desc) + +#else /* #ifdef TIMER_STRUCT */ + +#define TIMER_STACK_DEPTH 30 +#define TIMER_CREATE(name, desc, parent, symba, symbb) name, + +/*! \def TIMER_START(counter) + * \brief Starts the timer counter. + * + * Use this macro instead of directly accessing the CPU_Step array, + * so manual instrumentation APIs can be attached. + * + * \param[in] counter Name of the timer to start. + */ +#define TIMER_START_INTERNAL(counter) \ + { \ + TIMER_INSTRUMENT_START(counter); \ + CPU_Step[TimerStack[TimerStackPos]] += measure_time(); \ + int itimer; \ + for(itimer = 0; itimer <= TimerStackPos; itimer++) \ + if(counter == TimerStack[itimer]) \ + { \ + printf("Try to start timer %d, but it is already running.\n", counter); \ + terminate("fail") \ + }; \ + if(++TimerStackPos >= TIMER_STACK_DEPTH) \ + { \ + terminate("Run out of timer stack space, increase TIMER_STACK_DEPTH"); \ + } \ + else \ + { \ + TimerStack[TimerStackPos] = (counter); \ + } \ + } + +#define TIMER_START(counter) TIMER_START_INTERNAL(counter) + +/*! \def TIMER_STOP(counter) + * \brief Stops the timer counter + * + * Use this macro instead of directly accessing the CPU_Step array, + * so manual instrumentation APIs can be attached. + * + * \param[in] counter Name of the timer to stop. + */ +#define TIMER_STOP_INTERNAL(counter) \ + { \ + if(TimerStack[TimerStackPos] != (counter)) \ + { \ + terminate("Wrong use of TIMER_STOP, you must stop the timer started last"); \ + } \ + CPU_Step[TimerStack[TimerStackPos--]] += measure_time(); \ + if(TimerStackPos < 0) \ + { \ + terminate("Do not stop the out CPU_MISC timer"); \ + } \ + TIMER_INSTRUMENT_STOP(counter); \ + } + +#define TIMER_STOP(counter) TIMER_STOP_INTERNAL(counter) + +/*! \def TIMER_STOPSTART(stop, start) + * \brief Stops the timer 'stop' and starts the timer 'start' + * + * Use this macro instead of directly accessing the CPU_Step array, + * so manual instrumentation APIs can be attached. + * + * \param[in] stop Name of the timer to stop + * \param[in] start Name of the timer to start + */ +#define TIMER_STOPSTART(stop, start) \ + { \ + TIMER_STOP_INTERNAL(stop); \ + TIMER_START_INTERNAL(start); \ + } + +/*! \def TIMER_ADD(counter, amount) + * \brief Adds amount to the timer counter. + + * \param[in] counter Name of the timer to add to. + * \param[in] amount Amount to add to timer counter. + */ +#define TIMER_ADD(counter, amount) CPU_Step[counter] += (amount); + +/*! \def TIMER_DIFF(counter) + * \brief Returns amount elapsed for the timer since last save with + * TIMER_STORE. + * + * \param[in] counter Name of the timer to add to. + */ +#define TIMER_DIFF(counter) (CPU_Step[counter] - CPU_Step_Stored[counter]) + +/*! \def TIMER_STORE + * \brief Copies the current value of CPU times to a stored variable, such + * that differences with respect to this reference can be calculated. + */ +#define TIMER_STORE memcpy(CPU_Step_Stored, CPU_Step, sizeof(CPU_Step)); + +enum timers +{ + CPU_NONE = -2, /*!< used for counters without a parent */ + CPU_ROOT = -1, /*!< root node of the tree */ +#endif /* #ifdef TIMER_STRUCT #else */ + +/* possible characters to use for marking the parts: + * + * abdefghijklmnopqrstuvABCDEFGHHIJKLMNOPQRSTUV + * 0123456789 + * -:.*=[]^&;~/_$()?+"<>@#!|\ + */ + +/*add your counter here, they must appear in the right order*/ + +TIMER_CREATE(CPU_ALL, "total", CPU_ROOT, '-', '-') /*!< root timer, everything should be below this timer */ +TIMER_CREATE(CPU_TREE, "treegrav", CPU_ALL, 'a', ')') +TIMER_CREATE(CPU_TREEBUILD, "treebuild", CPU_TREE, 'b', '(') +TIMER_CREATE(CPU_TREEBUILD_INSERT, "insert", CPU_TREEBUILD, 'c', '*') +TIMER_CREATE(CPU_TREEBUILD_BRANCHES, "branches", CPU_TREEBUILD, 'd', '&') +TIMER_CREATE(CPU_TREEBUILD_TOPLEVEL, "toplevel", CPU_TREEBUILD, 'e', '^') +TIMER_CREATE(CPU_TREECOSTMEASURE, "treecostm", CPU_TREE, 'f', '%') +TIMER_CREATE(CPU_TREEWALK, "treewalk", CPU_TREE, 'g', '$') +TIMER_CREATE(CPU_TREEWALK1, "treewalk1", CPU_TREEWALK, 'h', '#') +TIMER_CREATE(CPU_TREEWALK2, "treewalk2", CPU_TREEWALK, 'i', '@') +TIMER_CREATE(CPU_TREEBALSNDRCV, "treebalsndrcv", CPU_TREE, 'j', '!') +TIMER_CREATE(CPU_TREESENDBACK, "treeback", CPU_TREE, 'm', '7') +TIMER_CREATE(CPU_TREEDIRECT, "treedirect", CPU_TREE, 'r', '2') +#ifdef PMGRID +TIMER_CREATE(CPU_PM_GRAVITY, "pm_grav", CPU_ALL, 's', '1') +#endif /* #ifdef PMGRID */ +TIMER_CREATE(CPU_NGBTREEBUILD, "ngbtreebuild", CPU_ALL, 't', 'Z') +TIMER_CREATE(CPU_NGBTREEUPDATEVEL, "ngbtreevelupdate", CPU_ALL, 'u', 'Y') +TIMER_CREATE(CPU_MESH, "voronoi", CPU_ALL, 'v', 'X') +TIMER_CREATE(CPU_MESH_INSERT, "insert", CPU_MESH, 'w', 'W') +TIMER_CREATE(CPU_MESH_FIND_DP, "findpoints", CPU_MESH, 'x', 'V') +TIMER_CREATE(CPU_MESH_CELLCHECK, "cellcheck", CPU_MESH, 'y', 'U') +TIMER_CREATE(CPU_MESH_GEOMETRY, "geometry", CPU_MESH, 'z', 'T') +TIMER_CREATE(CPU_MESH_EXCHANGE, "exchange", CPU_MESH, 'A', 'S') +TIMER_CREATE(CPU_MESH_DYNAMIC, "dynamic", CPU_MESH, 'B', 'R') +TIMER_CREATE(CPU_HYDRO, "hydro", CPU_ALL, 'C', 'Q') +TIMER_CREATE(CPU_GRADIENTS, "gradients", CPU_HYDRO, 'D', 'P') +TIMER_CREATE(CPU_FLUXES, "fluxes", CPU_HYDRO, 'F', 'N') +TIMER_CREATE(CPU_FLUXES_COMM, "fluxcomm", CPU_HYDRO, 'H', 'L') +TIMER_CREATE(CPU_CELL_UPDATES, "updates", CPU_HYDRO, 'J', 'j') +TIMER_CREATE(CPU_SET_VERTEXVELS, "vertex vel", CPU_HYDRO, 'K', 'I') +TIMER_CREATE(CPU_MHD, "mhd", CPU_HYDRO, '4', 'p') +TIMER_CREATE(CPU_DOMAIN, "domain", CPU_ALL, 'U', 'y') +TIMER_CREATE(CPU_PEANO, "peano", CPU_ALL, 'V', 'x') +TIMER_CREATE(CPU_DRIFTS, "drift/kicks", CPU_ALL, 'W', 'w') +TIMER_CREATE(CPU_TIMELINE, "timeline", CPU_ALL, 'X', 'v') +#ifdef TREE_BASED_TIMESTEPS +TIMER_CREATE(CPU_TREE_TIMESTEPS, "treetimesteps", CPU_ALL, 'Y', 'u') +#endif /* #ifdef TREE_BASED_TIMESTEPS */ +TIMER_CREATE(CPU_SNAPSHOT, "i/o", CPU_ALL, 'Z', 't') +TIMER_CREATE(CPU_LOGS, "logs", CPU_ALL, '1', 's') +TIMER_CREATE(CPU_COOLINGSFR, "sfrcool", CPU_ALL, '2', 'r') +#ifdef FOF +TIMER_CREATE(CPU_FOF, "fof", CPU_ALL, '#', 'h') +#endif /* #ifdef FOF */ +#ifdef SUBFIND +TIMER_CREATE(CPU_SUBFIND, "subfind", CPU_ALL, '$', 'g') +#endif /* #ifdef SUBFIND */ +TIMER_CREATE(CPU_REFINE, "refine", CPU_ALL, '%', 'f') +TIMER_CREATE(CPU_DEREFINE, "mesh_derefine", CPU_ALL, '^', 'e') +TIMER_CREATE(CPU_MAKEIMAGES, "images", CPU_ALL, '&', 'd') +TIMER_CREATE(CPU_INIT, "initializ.", CPU_ALL, '*', 'c') +TIMER_CREATE(CPU_RESTART, "restart", CPU_ALL, '(', 'b') +TIMER_CREATE(CPU_MISC, "misc", CPU_ALL, ')', 'a') +TIMER_CREATE(CPU_LAST, "LAST", CPU_NONE, ' ', ' ') /*! Date: Fri, 18 Mar 2022 17:27:57 +0000 Subject: [PATCH 03/70] remove '#include ./arepoconfig.h' from allvars.h --- src/amuse/community/arepo/src/main/allvars.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/amuse/community/arepo/src/main/allvars.h b/src/amuse/community/arepo/src/main/allvars.h index 2dc46e56b3..409165f7b4 100644 --- a/src/amuse/community/arepo/src/main/allvars.h +++ b/src/amuse/community/arepo/src/main/allvars.h @@ -41,8 +41,6 @@ #include #include -#include "./arepoconfig.h" - #ifdef IMPOSE_PINNING #include #endif /* #ifdef IMPOSE_PINNING */ From b221aec6cffe53d3cc7c10acf490434774c5e65c Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 18 Mar 2022 17:32:56 +0000 Subject: [PATCH 04/70] update Makefile to include arepo source code --- src/amuse/community/arepo/src/Makefile | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 66f18374a1..4b7835bd0a 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,20 +1,27 @@ -CFLAGS += -Wall -g +GSL_INCL = -I/opt/Homebrew/include # Need to make GSL_INCL generalisable. + +CFLAGS += -Wall -g $(GSL_INCL) CXXFLAGS += $(CFLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) CODELIB = libarepo.a -CODEOBJS = test.o +SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \ + hydro init io main mesh mpi_utils ngbtree star_formation subfind \ + time_integration utils +SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c)) + +CODEOBJS = test.o $(SRCS:c=o) AR = ar ruv RANLIB = ranlib RM = rm -all: $(CODELIB) - +all: $(CODELIB) clean: $(RM) -f *.o *.a + $(RM) $(SRCS:c=o) distclean: clean From 3b250368dcdb819f97d7bb81e4330a1b4fc98c09 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 18 Mar 2022 17:41:06 +0000 Subject: [PATCH 05/70] add TODO to Makefile --- src/amuse/community/arepo/src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 4b7835bd0a..67b321843b 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,4 +1,4 @@ -GSL_INCL = -I/opt/Homebrew/include # Need to make GSL_INCL generalisable. +GSL_INCL = -I/opt/Homebrew/include # TODO: Need to make GSL_INCL generalisable. CFLAGS += -Wall -g $(GSL_INCL) CXXFLAGS += $(CFLAGS) From d56c2b3d3fd7961f63d307cd010bb2c2076aafe7 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 13:20:31 +0100 Subject: [PATCH 06/70] rename classes --- src/amuse/community/arepo/interface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 64130dc384..7be0a7665f 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -1,6 +1,6 @@ from amuse.community import * -class arepoInterface(CodeInterface): +class ArepoInterface(CodeInterface): include_headers = ['worker_code.h'] @@ -17,8 +17,8 @@ def echo_int(): return function -class arepo(InCodeComponentImplementation): +class Arepo(InCodeComponentImplementation): def __init__(self, **options): - InCodeComponentImplementation.__init__(self, arepoInterface(**options), **options) + InCodeComponentImplementation.__init__(self, ArepoInterface(**options), **options) From 469b04d20fe1ba2dccf3e6fcaa70bd9db90f3fc9 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 13:29:16 +0100 Subject: [PATCH 07/70] inherit from GravitationalDynamicsInterface --- src/amuse/community/arepo/interface.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 7be0a7665f..a52600e5cf 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -1,6 +1,7 @@ from amuse.community import * +from amuse.community.interface.gd import GravitationalDynamicsInterface -class ArepoInterface(CodeInterface): +class ArepoInterface(CodeInterface, GravitationalDynamicsInterface): include_headers = ['worker_code.h'] From a0a927cf913fa0ae534ebd0c3f94ee41337690cd Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 13:51:12 +0100 Subject: [PATCH 08/70] inherit from LiteratureReferencesMixIn --- src/amuse/community/arepo/interface.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index a52600e5cf..a9248e400c 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -1,12 +1,16 @@ from amuse.community import * from amuse.community.interface.gd import GravitationalDynamicsInterface -class ArepoInterface(CodeInterface, GravitationalDynamicsInterface): +class ArepoInterface( + CodeInterface, + GravitationalDynamicsInterface, + LiteratureReferencesMixIn): include_headers = ['worker_code.h'] def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) + LiteratureReferencesMixIn.__init__(self) @legacy_function def echo_int(): From 016c75ec2063a553cd2cc73b8b939f0984eb81ee Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 13:55:52 +0100 Subject: [PATCH 09/70] remove wildcard import --- src/amuse/community/arepo/interface.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index a9248e400c..8f11ab923f 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -1,4 +1,8 @@ -from amuse.community import * +from amuse.community import CodeInterface +from amuse.community import LegacyFunctionSpecification +from amuse.community import legacy_function +from amuse.community import LiteratureReferencesMixIn + from amuse.community.interface.gd import GravitationalDynamicsInterface class ArepoInterface( From fae9ec3d41595dc867d330687388190cd647d0ec Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 14:23:25 +0100 Subject: [PATCH 10/70] inherit from GravitationalDynamics --- src/amuse/community/arepo/interface.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 8f11ab923f..6601714257 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -4,6 +4,7 @@ from amuse.community import LiteratureReferencesMixIn from amuse.community.interface.gd import GravitationalDynamicsInterface +from amuse.community.interface.gd import GravitationalDynamics class ArepoInterface( CodeInterface, @@ -26,8 +27,8 @@ def echo_int(): return function -class Arepo(InCodeComponentImplementation): +class Arepo(GravitationalDynamics): def __init__(self, **options): - InCodeComponentImplementation.__init__(self, ArepoInterface(**options), **options) + GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) From e8a31134927ecb0225583195694b3655607d073f Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 14:29:11 +0100 Subject: [PATCH 11/70] minor reformatting --- src/amuse/community/arepo/interface.py | 29 +++++++++++++------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 6601714257..89e089fc2a 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -6,29 +6,30 @@ from amuse.community.interface.gd import GravitationalDynamicsInterface from amuse.community.interface.gd import GravitationalDynamics + class ArepoInterface( CodeInterface, GravitationalDynamicsInterface, - LiteratureReferencesMixIn): - - include_headers = ['worker_code.h'] - + LiteratureReferencesMixIn +): + + include_headers = ["worker_code.h"] + def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) LiteratureReferencesMixIn.__init__(self) - + @legacy_function def echo_int(): - function = LegacyFunctionSpecification() - function.addParameter('int_in', dtype='int32', direction=function.IN) - function.addParameter('int_out', dtype='int32', direction=function.OUT) - function.result_type = 'int32' + function = LegacyFunctionSpecification() + function.addParameter("int_in", dtype="int32", direction=function.IN) + function.addParameter("int_out", dtype="int32", direction=function.OUT) + function.result_type = "int32" function.can_handle_array = True return function - - -class Arepo(GravitationalDynamics): - def __init__(self, **options): - GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) + +class Arepo(GravitationalDynamics): + def __init__(self, **options): + GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) From a3a020a27a8d1f4564d54f0ce4cf4b987aaf7b95 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 15:00:09 +0100 Subject: [PATCH 12/70] add ArepoInterface docstring --- src/amuse/community/arepo/interface.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 89e089fc2a..34d46a07a9 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -12,6 +12,16 @@ class ArepoInterface( GravitationalDynamicsInterface, LiteratureReferencesMixIn ): + """ + Arepo is a cosmological magnetohydrodynamical moving-mesh simulation code, + descended from GADGET. + + References: + .. [#] Springel, V., 2010, MNRAS, 401, 791 (Arepo) [2010MNRAS.401..791S] + .. [#] Pakmor, R., Bauer, A., Springel, V., 2011, MNRAS, 418, 1392 (Magnetohydrodynamics Module) [2011MNRAS.418.1392P] + .. [#] Pakmor, R. et al., 2016, MNRAS, 455, 1134 (Gradient Estimation) [2016MNRAS.455.1134P] + .. [#] Weinberger, R., Springel, V., Pakmor, R., 2020, ApJS, 248, 32 (Public Code Release) [2020ApJS..248...32W] + """ include_headers = ["worker_code.h"] From 1d668dd6b713caf0a5bd43c44674d42d785e9863 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 15:08:46 +0100 Subject: [PATCH 13/70] add TODO re CodeWithDataDirectories --- src/amuse/community/arepo/interface.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 34d46a07a9..989190ebd8 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -28,6 +28,7 @@ class ArepoInterface( def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) LiteratureReferencesMixIn.__init__(self) + # TODO: Determine whether need to inherit from CodeWithDataDirectories. @legacy_function def echo_int(): From aa1c5982c557cee7cac87e4fa5d0541bb6bc7221 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 16:32:49 +0100 Subject: [PATCH 14/70] add set_parameters() --- src/amuse/community/arepo/interface.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 989190ebd8..ac122b9f23 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -31,12 +31,10 @@ def __init__(self, **keyword_arguments): # TODO: Determine whether need to inherit from CodeWithDataDirectories. @legacy_function - def echo_int(): + def set_parameters(): function = LegacyFunctionSpecification() - function.addParameter("int_in", dtype="int32", direction=function.IN) - function.addParameter("int_out", dtype="int32", direction=function.OUT) + function.addParameter("param_file", dtype="string", direction=function.IN) function.result_type = "int32" - function.can_handle_array = True return function From e312e0185138fb4e99230588540c7ad513e37b20 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 17:24:54 +0100 Subject: [PATCH 15/70] change GSL_INCL to GSL_FLAGS --- src/amuse/community/arepo/src/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 67b321843b..4f99b34024 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,6 +1,6 @@ -GSL_INCL = -I/opt/Homebrew/include # TODO: Need to make GSL_INCL generalisable. +GSL_FLAGS = -I/opt/Homebrew/include # TODO: Need to make GSL_FLAGS generalisable. -CFLAGS += -Wall -g $(GSL_INCL) +CFLAGS += -Wall -g $(GSL_FLAGS) CXXFLAGS += $(CFLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) From d1c5ccaeb08ad872d4c39076aa72d4f56babb9df Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 17:57:32 +0100 Subject: [PATCH 16/70] add AMUSE_DIR conditional and include config.mk --- src/amuse/community/arepo/src/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 4f99b34024..8b3de12905 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,3 +1,8 @@ +ifeq ($(origin AMUSE_DIR), undefined) + AMUSE_DIR := $(shell amusifier --get-amuse-dir) +endif +-include $(AMUSE_DIR)/config.mk + GSL_FLAGS = -I/opt/Homebrew/include # TODO: Need to make GSL_FLAGS generalisable. CFLAGS += -Wall -g $(GSL_FLAGS) From c1a672f8d1162cfe12a99814b4a8577dafaf43fd Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Thu, 31 Mar 2022 10:02:33 +0100 Subject: [PATCH 17/70] generate interface.cc --- src/amuse/community/arepo/interface.cc | 175 +++++++++++++++++++++++-- 1 file changed, 166 insertions(+), 9 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index a590e82689..160b99ab25 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -1,11 +1,168 @@ -extern int echo(int input); - -/* - * Interface code - */ - -int echo_int(int input, int * output){ - *output = echo(input); - return 0; +#include "worker_code.h" + +int get_mass(int index_of_the_particle, double * mass){ + return 0; +} + +int commit_particles(){ + return 0; +} + +int get_time(double * time){ + return 0; +} + +int set_mass(int index_of_the_particle, double mass){ + return 0; +} + +int get_index_of_first_particle(int * index_of_the_particle){ + return 0; +} + +int get_total_radius(double * radius){ + return 0; +} + +int new_particle(int * index_of_the_particle, double mass, double x, + double y, double z, double vx, double vy, double vz, double radius){ + return 0; +} + +int get_total_mass(double * mass){ + return 0; +} + +int evolve_model(double time){ + return 0; +} + +int set_eps2(double epsilon_squared){ + return 0; +} + +int get_begin_time(double * time){ + return 0; +} + +int get_eps2(double * epsilon_squared){ + return 0; +} + +int get_index_of_next_particle(int index_of_the_particle, + int * index_of_the_next_particle){ + return 0; +} + +int delete_particle(int index_of_the_particle){ + return 0; +} + +int get_potential(int index_of_the_particle, double * potential){ + return 0; +} + +int synchronize_model(){ + return 0; +} + +int set_state(int index_of_the_particle, double mass, double x, double y, + double z, double vx, double vy, double vz, double radius){ + return 0; +} + +int get_state(int index_of_the_particle, double * mass, double * x, + double * y, double * z, double * vx, double * vy, double * vz, + double * radius){ + return 0; +} + +int get_time_step(double * time_step){ + return 0; +} + +int recommit_particles(){ + return 0; +} + +int get_kinetic_energy(double * kinetic_energy){ + return 0; +} + +int get_number_of_particles(int * number_of_particles){ + return 0; +} + +int set_acceleration(int index_of_the_particle, double ax, double ay, + double az){ + return 0; +} + +int get_center_of_mass_position(double * x, double * y, double * z){ + return 0; +} + +int get_center_of_mass_velocity(double * vx, double * vy, double * vz){ + return 0; +} + +int get_radius(int index_of_the_particle, double * radius){ + return 0; +} + +int set_begin_time(double time){ + return 0; +} + +int set_radius(int index_of_the_particle, double radius){ + return 0; +} + +int cleanup_code(){ + return 0; +} + +int recommit_parameters(){ + return 0; +} + +int initialize_code(){ + return 0; +} + +int get_potential_energy(double * potential_energy){ + return 0; +} + +int get_velocity(int index_of_the_particle, double * vx, double * vy, + double * vz){ + return 0; +} + +int get_position(int index_of_the_particle, double * x, double * y, + double * z){ + return 0; +} + +int set_position(int index_of_the_particle, double x, double y, double z){ + return 0; +} + +int get_acceleration(int index_of_the_particle, double * ax, double * ay, + double * az){ + return 0; +} + +int commit_parameters(){ + return 0; +} + +int set_parameters(char * param_file){ + return 0; +} + +int set_velocity(int index_of_the_particle, double vx, double vy, + double vz){ + return 0; } From 12a022d97abb63a30468db039011c3b53843e47f Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Thu, 31 Mar 2022 10:08:44 +0100 Subject: [PATCH 18/70] update interface name in Makefile --- src/amuse/community/arepo/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 7c392db261..567a479296 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -30,10 +30,10 @@ $(CODELIB): make -C src all worker_code.cc: interface.py - $(CODE_GENERATOR) --type=c interface.py arepoInterface -o $@ + $(CODE_GENERATOR) --type=c interface.py ArepoInterface -o $@ worker_code.h: interface.py - $(CODE_GENERATOR) --type=H interface.py arepoInterface -o $@ + $(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ From fb1f673a05290800331631c390c5473eb2b99ab9 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Thu, 31 Mar 2022 10:20:13 +0100 Subject: [PATCH 19/70] update interface name in test_arepo.py --- src/amuse/community/arepo/test_arepo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py index 8cdeabb474..d4a7b22b38 100644 --- a/src/amuse/community/arepo/test_arepo.py +++ b/src/amuse/community/arepo/test_arepo.py @@ -1,12 +1,12 @@ from amuse.test.amusetest import TestWithMPI -from .interface import arepoInterface +from .interface import ArepoInterface from .interface import arepo -class arepoInterfaceTests(TestWithMPI): +class ArepoInterfaceTests(TestWithMPI): def test1(self): - instance = arepoInterface() + instance = ArepoInterface() result,error = instance.echo_int(12) self.assertEquals(error, 0) self.assertEquals(result, 12) From 4d050da72c6cd0310ed4293f972cb2d030e183c6 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Thu, 31 Mar 2022 10:29:58 +0100 Subject: [PATCH 20/70] update arepo to Arepo --- src/amuse/community/arepo/test_arepo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py index d4a7b22b38..a54108fd43 100644 --- a/src/amuse/community/arepo/test_arepo.py +++ b/src/amuse/community/arepo/test_arepo.py @@ -1,7 +1,7 @@ from amuse.test.amusetest import TestWithMPI from .interface import ArepoInterface -from .interface import arepo +from .interface import Arepo class ArepoInterfaceTests(TestWithMPI): From 4ff74a9cce4c78b08d5669c2562e8c011e182edf Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 15:33:26 +0100 Subject: [PATCH 21/70] add code from arepo main.c to initialize_code() and cleanup_code() --- src/amuse/community/arepo/interface.cc | 79 +++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 160b99ab25..b6656bc04a 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -1,5 +1,76 @@ #include "worker_code.h" +#include "src/main/allvars.h" +#include "src/main/proto.h" + +int initialize_code(){ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); + MPI_Comm_size(MPI_COMM_WORLD, &NTask); + + /* output a welcome message */ + hello(); + + /* initialize CPU-time/Wallclock-time measurement */ + init_cpu_log(); + + determine_compute_nodes(); + + for(PTask = 0; NTask > (1 << PTask); PTask++) + ; + + begrun0(); + + strcpy(ParameterFile, "param.txt"); /* Removing command line parsing. argv[1] replaced with "param.txt". */ + RestartFlag = 0; + + begrun1(); /* set-up run */ + + char fname[MAXLEN_PATH]; + strcpy(fname, All.InitCondFile); + + /* now we can load the file */ + +#ifdef READ_DM_AS_GAS + read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES); +#else /* #ifdef READ_DM_AS_GAS */ + read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES); +#endif /* #ifdef READ_DM_AS_GAS #else */ + + /* init returns a status code, where a value of >=0 means that endrun() should be called. */ + int status = init(); + + if(status >= 0) + { + if(status > 0) + mpi_printf("init() returned with %d\n", status); + + cleanup_code(); + } + + begrun2(); + return 0; +} + +int cleanup_code(){ + mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + fflush(stdout); + +#ifdef HAVE_HDF5 + /*The hdf5 library will sometimes register an atexit() handler that calls its + * error handler. In AREPO this is set to my_hdf_error_handler, which calls + * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed. + * Hence unset the HDF error handler here + */ + H5Eset_auto(NULL, NULL); +#endif /* #ifdef HAVE_HDF5 */ + + MPI_Finalize(); + exit(0); + return 0; +} + int get_mass(int index_of_the_particle, double * mass){ return 0; } @@ -118,18 +189,10 @@ int set_radius(int index_of_the_particle, double radius){ return 0; } -int cleanup_code(){ - return 0; -} - int recommit_parameters(){ return 0; } -int initialize_code(){ - return 0; -} - int get_potential_energy(double * potential_energy){ return 0; } From 7efa65153bf353a56a0601d1decca50a881adb6f Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 15:44:25 +0100 Subject: [PATCH 22/70] comment out set_parameters() and add TODO --- src/amuse/community/arepo/interface.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index ac122b9f23..fef740d365 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -30,12 +30,15 @@ def __init__(self, **keyword_arguments): LiteratureReferencesMixIn.__init__(self) # TODO: Determine whether need to inherit from CodeWithDataDirectories. - @legacy_function - def set_parameters(): - function = LegacyFunctionSpecification() - function.addParameter("param_file", dtype="string", direction=function.IN) - function.result_type = "int32" - return function + # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function. + # This function has been kept as a template for future functions. + + # @legacy_function + # def set_parameters(): + # function = LegacyFunctionSpecification() + # function.addParameter("param_file", dtype="string", direction=function.IN) + # function.result_type = "int32" + # return function class Arepo(GravitationalDynamics): From 2c1f16add06379e9d8d62c67f9026eed1678f775 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 15:50:26 +0100 Subject: [PATCH 23/70] add TODO to test_arepo.py --- src/amuse/community/arepo/test_arepo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py index a54108fd43..6aea82105f 100644 --- a/src/amuse/community/arepo/test_arepo.py +++ b/src/amuse/community/arepo/test_arepo.py @@ -7,7 +7,7 @@ class ArepoInterfaceTests(TestWithMPI): def test1(self): instance = ArepoInterface() - result,error = instance.echo_int(12) + result,error = instance.echo_int(12) # TODO: Update test and add more... self.assertEquals(error, 0) self.assertEquals(result, 12) instance.stop() From 5daa7e494ca52a8039fbf98dd11edc4594f8eec6 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 16:04:20 +0100 Subject: [PATCH 24/70] add TODO to Makefile --- src/amuse/community/arepo/src/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 8b3de12905..27c1b505b9 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,3 +1,4 @@ +# TODO: Determine whether this is needed as included in arepo/Makefile. ifeq ($(origin AMUSE_DIR), undefined) AMUSE_DIR := $(shell amusifier --get-amuse-dir) endif From ed27e203529c185d23c275b8afb141f1a8f14f25 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 17:02:21 +0100 Subject: [PATCH 25/70] remove test.cc as not needed --- src/amuse/community/arepo/src/test.cc | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 src/amuse/community/arepo/src/test.cc diff --git a/src/amuse/community/arepo/src/test.cc b/src/amuse/community/arepo/src/test.cc deleted file mode 100644 index c30eeef8cb..0000000000 --- a/src/amuse/community/arepo/src/test.cc +++ /dev/null @@ -1,6 +0,0 @@ -/* - * Example function for a code - */ -int echo(int input){ - return input; -} From 5b4065610e3b21d8f0b06e3dfccf533c9b947867 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 17:27:11 +0100 Subject: [PATCH 26/70] add initialize_code() and define_methods() to Arepo --- src/amuse/community/arepo/interface.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index fef740d365..0e1ae7e60b 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -30,9 +30,7 @@ def __init__(self, **keyword_arguments): LiteratureReferencesMixIn.__init__(self) # TODO: Determine whether need to inherit from CodeWithDataDirectories. - # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function. - # This function has been kept as a template for future functions. - + # This function has been kept as a basic template for future functions. # @legacy_function # def set_parameters(): # function = LegacyFunctionSpecification() @@ -45,3 +43,20 @@ class Arepo(GravitationalDynamics): def __init__(self, **options): GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) + + def initialize_code(self): + result = self.overridden().initialize_code() + + # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function. + # Could be done in the way in which Gadget2 sets the gadget_output_directory. + #self.parameters.gadget_output_directory = self.get_output_directory() + + return result + + def define_methods(self, builder): + # TODO: Determine how to link this to Arepo's run() - the main simulation loop. + builder.add_method( + "run", + (), + (builder.ERROR_CODE) + ) From deb6a8b49ecafd0435f3982f240cdfaf9a11dfff Mon Sep 17 00:00:00 2001 From: Matthew West Date: Thu, 12 May 2022 13:46:02 +0100 Subject: [PATCH 27/70] add default parameters to arepo's interface.cc --- src/amuse/community/arepo/interface.cc | 133 ++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 12 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index b6656bc04a..10fa51a8af 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -3,6 +3,117 @@ #include "src/main/allvars.h" #include "src/main/proto.h" +// general interface functions: + +void set_default_parameters(){ + // Relevant files + strcpy(All.InitCondFile, "./snap_010"); + strcpy(All.OutputDir, "./output"); + strcpy(All.SnapshotFileBase, "snap"); + strcpy(All.OutputListFilename, "./output_list.txt"); + + // File formats + All.ICFormat = 1; + All.SnapFormat = 3; + + // CPU-time LimitUBelowThisDensity + All.TimeLimitCPU = 93000; + All.CpuTimeBetRestartFile = 12000; + All.ResubmitOn = 0; + strcpy(All.ResubmitCommand, "my-scriptfile"); + + // Memory allocation + All.MaxMemSize = 2500; + + // Characteristics of run + All.TimeBegin = 0.0; + All.TimeMax = 1.0; + + // Basic code options that set simulation type + All.ComovingIntegrationOn = 0; + All.PeriodicBoundariesOn = 0; + All.CoolingOn = 0; + All.StarformationOn = 0; + + // Cosmological parameters + All.Omega0 = 0.0; + All.OmegaLambda = 0.0; + All.OmegaBaryon = 0.0; + All.HubbleParam = 1.0; + All.BoxSize = 100000.0; + + // Output frequency and output parameters + All.OutputListOn = 1; + All.TimeBetSnapshot = 0.0; + All.TimeOfFirstSnapshot = 0.0; + All.TimeBetStatistics = 0.01; + All.NumFilesPerSnapshot = 1; + All.NumFilesWrittenInParallel = 1; + + // Integration timing accuracy + All.TypeOfTimestepCriterion = 0; + All.ErrTolIntAccuracy = 0.012; + All.CourantFac = 0.3; + All.MaxSizeTimestep = 0.05; + All.MinSizeTimestep = 2.0e-9; + + // Treatment of empty space and temp limits + All.InitGasTemp = 244.8095; + All.MinGasTemp = 5.0; + All.MinimumDensityOnStartUp = 1.0e-20; + All.LimitUBelowThisDensity = 0.0; + All.LimitUBelowCertainDensityToThisValue = 0.0; + All.MinEgySpec = 0.0; + + // Tree algorithm, force accuracy, domain update frequency + All.TypeOfOpeningCriterion = 1; + All.ErrTolTheta = 0.7; + All.ErrTolForceAcc = 0.0025; + All.MultipleDomains = 8; + All.TopNodeFactor = 2.5; + All.ActivePartFracForNewDomainDecomp = 0.01; + + // Initial density estimates + All.DesNumNgb = 64; + All.MaxNumNgbDeviation = 4; + + // System of Units + All.UnitLength_in_cm = 3.085678e21; + All.UnitMass_in_g = 1.989e43; + All.UnitVelocity_in_cm_per_s = 1e5; + + // Gravitational softening lengths + All.SofteningComovingType0 = 1.0; + All.SofteningComovingType1 = 1.0; + + All.SofteningMaxPhysType0 = 1.0; + All.SofteningMaxPhysType1 = 1.0; + + All.GasSoftFactor = 2.5; + + All.SofteningTypeOfPartType0 = 0; + All.SofteningTypeOfPartType1 = 1; + All.SofteningTypeOfPartType2 = 1; + All.SofteningTypeOfPartType3 = 1; + All.SofteningTypeOfPartType4 = 1; + All.SofteningTypeOfPartType5 = 1; + + All.MinimumComovingHydroSoftening = 1.0; + All.AdaptiveHydroSofteningSpacing = 1.2; + + // Mesh regularization options + All.CellShapingSpeed = 0.5; + All.CellShapingFactor = 1.0; + + // parameters that are fixed for AMUSE: + All.PartAllocFactor = 1.5; // Memory allocation parameter + All.TreeAllocFactor = 0.8; // Memory allocation parameter + All.BufferSize = 25; // Memory allocation parameter + All.ResubmitOn = 0; // Keep this turned off! + All.OutputListOn = 0; // Keep this turned off + All.GravityConstantInternal = 0; // Keep this turned off +} + int initialize_code(){ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); @@ -21,7 +132,6 @@ int initialize_code(){ begrun0(); - strcpy(ParameterFile, "param.txt"); /* Removing command line parsing. argv[1] replaced with "param.txt". */ RestartFlag = 0; begrun1(); /* set-up run */ @@ -95,7 +205,7 @@ int get_total_radius(double * radius){ return 0; } -int new_particle(int * index_of_the_particle, double mass, double x, +int new_particle(int * index_of_the_particle, double mass, double x, double y, double z, double vx, double vy, double vz, double radius){ return 0; } @@ -120,7 +230,7 @@ int get_eps2(double * epsilon_squared){ return 0; } -int get_index_of_next_particle(int index_of_the_particle, +int get_index_of_next_particle(int index_of_the_particle, int * index_of_the_next_particle){ return 0; } @@ -137,13 +247,13 @@ int synchronize_model(){ return 0; } -int set_state(int index_of_the_particle, double mass, double x, double y, +int set_state(int index_of_the_particle, double mass, double x, double y, double z, double vx, double vy, double vz, double radius){ return 0; } -int get_state(int index_of_the_particle, double * mass, double * x, - double * y, double * z, double * vx, double * vy, double * vz, +int get_state(int index_of_the_particle, double * mass, double * x, + double * y, double * z, double * vx, double * vy, double * vz, double * radius){ return 0; } @@ -164,7 +274,7 @@ int get_number_of_particles(int * number_of_particles){ return 0; } -int set_acceleration(int index_of_the_particle, double ax, double ay, +int set_acceleration(int index_of_the_particle, double ax, double ay, double az){ return 0; } @@ -197,12 +307,12 @@ int get_potential_energy(double * potential_energy){ return 0; } -int get_velocity(int index_of_the_particle, double * vx, double * vy, +int get_velocity(int index_of_the_particle, double * vx, double * vy, double * vz){ return 0; } -int get_position(int index_of_the_particle, double * x, double * y, +int get_position(int index_of_the_particle, double * x, double * y, double * z){ return 0; } @@ -211,7 +321,7 @@ int set_position(int index_of_the_particle, double x, double y, double z){ return 0; } -int get_acceleration(int index_of_the_particle, double * ax, double * ay, +int get_acceleration(int index_of_the_particle, double * ax, double * ay, double * az){ return 0; } @@ -224,8 +334,7 @@ int set_parameters(char * param_file){ return 0; } -int set_velocity(int index_of_the_particle, double vx, double vy, +int set_velocity(int index_of_the_particle, double vx, double vy, double vz){ return 0; } - From fc35f9d8bce838975587490108467f4cfce7ff1f Mon Sep 17 00:00:00 2001 From: Matthew West Date: Fri, 13 May 2022 10:57:33 +0100 Subject: [PATCH 28/70] Comment out read_parameter_file in begrun, add run_sim in interface.cc, add cleanup_code to interface.py, add set_default_parameters to initialize_code --- src/amuse/community/arepo/interface.cc | 7 +++++++ src/amuse/community/arepo/interface.py | 14 ++++++++------ src/amuse/community/arepo/src/init/begrun.c | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 10fa51a8af..6a1b9641b1 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -134,6 +134,7 @@ int initialize_code(){ RestartFlag = 0; + set_default_parameters(); begrun1(); /* set-up run */ char fname[MAXLEN_PATH]; @@ -162,6 +163,12 @@ int initialize_code(){ return 0; } +int run_sim() { + /* This run command is for the Arepo simulation */ + run(); + return 0; +} + int cleanup_code(){ mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 0e1ae7e60b..1f228321b9 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -40,23 +40,25 @@ def __init__(self, **keyword_arguments): class Arepo(GravitationalDynamics): - + def __init__(self, **options): GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) def initialize_code(self): result = self.overridden().initialize_code() - # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function. - # Could be done in the way in which Gadget2 sets the gadget_output_directory. - #self.parameters.gadget_output_directory = self.get_output_directory() - return result def define_methods(self, builder): # TODO: Determine how to link this to Arepo's run() - the main simulation loop. builder.add_method( - "run", + "run_sim", + (), + (builder.ERROR_CODE) + ) + # When simulation is finished, shutdown HDF5 & MPI, and exit(0) + builder.add_method( + "cleanup_code", (), (builder.ERROR_CODE) ) diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c index ad8a5222ca..5db8ba6351 100644 --- a/src/amuse/community/arepo/src/init/begrun.c +++ b/src/amuse/community/arepo/src/init/begrun.c @@ -101,7 +101,7 @@ void begrun0(void) */ void begrun1(void) { - read_parameter_file(ParameterFile); /* ... read in parameters for this run */ + /* read_parameter_file(ParameterFile); ... read in parameters for this run */ check_parameters(); /* consistency check of parameters */ From 201535d042be63baf5a2bcdb01795d9d13d40424 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Wed, 18 May 2022 12:59:52 +0100 Subject: [PATCH 29/70] fixes --- src/amuse/community/arepo/src/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 27c1b505b9..197fa00cef 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,11 +1,14 @@ +MPICXX ?= mpicxx +MPICC ?= mpicc # TODO: Determine whether this is needed as included in arepo/Makefile. ifeq ($(origin AMUSE_DIR), undefined) AMUSE_DIR := $(shell amusifier --get-amuse-dir) endif -include $(AMUSE_DIR)/config.mk -GSL_FLAGS = -I/opt/Homebrew/include # TODO: Need to make GSL_FLAGS generalisable. +CC = $(MPICC) # sets the C-compiler +# GSL_FLAGS = -I/opt/Homebrew/include # TODO: Need to make GSL_FLAGS generalisable. CFLAGS += -Wall -g $(GSL_FLAGS) CXXFLAGS += $(CFLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) @@ -17,7 +20,7 @@ SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \ time_integration utils SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c)) -CODEOBJS = test.o $(SRCS:c=o) +CODEOBJS = $(SRCS:c=o) AR = ar ruv RANLIB = ranlib From fbcce03f21c342f67be4c9f8da5c855fd16aa549 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Wed, 18 May 2022 13:13:22 +0100 Subject: [PATCH 30/70] define CXX flag in Arepo makefile --- src/amuse/community/arepo/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 567a479296..46f0668422 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -6,7 +6,7 @@ endif -include $(AMUSE_DIR)/config.mk MPICXX ?= mpicxx - +CXX = $(MPICXX) CFLAGS += -Wall -g CXXFLAGS += $(CFLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) From 5cf22a118f03c6b8df78c3b454d445edab234f1d Mon Sep 17 00:00:00 2001 From: Matthew West Date: Wed, 18 May 2022 13:50:04 +0100 Subject: [PATCH 31/70] set_default values to match current arepo config data structures --- src/amuse/community/arepo/interface.cc | 33 +++++++++++++------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 6a1b9641b1..44fe3205d7 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -83,38 +83,39 @@ void set_default_parameters(){ All.UnitVelocity_in_cm_per_s = 1e5; // Gravitational softening lengths - All.SofteningComovingType0 = 1.0; - All.SofteningComovingType1 = 1.0; - - All.SofteningMaxPhysType0 = 1.0; - All.SofteningMaxPhysType1 = 1.0; - + All.SofteningComoving[0] = 1.0; + All.SofteningComoving[1] = 1.0; + All.SofteningMaxPhys[0] = 1.0; + All.SofteningMaxPhys[1] = 1.0; All.GasSoftFactor = 2.5; - All.SofteningTypeOfPartType0 = 0; - All.SofteningTypeOfPartType1 = 1; - All.SofteningTypeOfPartType2 = 1; - All.SofteningTypeOfPartType3 = 1; - All.SofteningTypeOfPartType4 = 1; - All.SofteningTypeOfPartType5 = 1; - All.MinimumComovingHydroSoftening = 1.0; - All.AdaptiveHydroSofteningSpacing = 1.2; + All.SofteningTypeOfPartType[0] = 0; + All.SofteningTypeOfPartType[1] = 1; + All.SofteningTypeOfPartType[2] = 1; + All.SofteningTypeOfPartType[3] = 1; + All.SofteningTypeOfPartType[4] = 1; + All.SofteningTypeOfPartType[5] = 1; + #ifdef ADAPTIVE_HYDRO_SOFTENING + All.MinimumComovingHydroSoftening = 1.0; + All.AdaptiveHydroSofteningSpacing = 1.2; + #endif // Mesh regularization options All.CellShapingSpeed = 0.5; All.CellShapingFactor = 1.0; // parameters that are fixed for AMUSE: - All.PartAllocFactor = 1.5; // Memory allocation parameter All.TreeAllocFactor = 0.8; // Memory allocation parameter - All.BufferSize = 25; // Memory allocation parameter All.ResubmitOn = 0; // Keep this turned off! All.OutputListOn = 0; // Keep this turned off All.GravityConstantInternal = 0; // Keep this turned off } int initialize_code(){ + int argc = 0; + char **argv=NULL; + MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); MPI_Comm_size(MPI_COMM_WORLD, &NTask); From af0f37408b87f53079b6e268a72ac54eb28c6ad3 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Thu, 19 May 2022 12:14:44 +0100 Subject: [PATCH 32/70] add #include mpi.h & run.c change mpi_print to just print --- src/amuse/community/arepo/interface.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 44fe3205d7..a212d90b24 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -1,7 +1,12 @@ +#ifndef NOMPI +#include +#endif + #include "worker_code.h" #include "src/main/allvars.h" #include "src/main/proto.h" +#include "src/main/run.c" // general interface functions: @@ -155,7 +160,7 @@ int initialize_code(){ if(status >= 0) { if(status > 0) - mpi_printf("init() returned with %d\n", status); + printf("init() returned with %d\n", status); cleanup_code(); } @@ -171,8 +176,8 @@ int run_sim() { } int cleanup_code(){ - mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); - mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); fflush(stdout); #ifdef HAVE_HDF5 From 3ecbaa3ae7a7db1b990cd479a4d158c037210f51 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 13:17:54 +0100 Subject: [PATCH 33/70] include GSL headers --- src/amuse/community/arepo/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 46f0668422..3e67a00625 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -39,4 +39,4 @@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ .cc.o: $< - $(CXX) $(CXXFLAGS) -c -o $@ $< + $(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< From 5ab0a4b3d6845b83631547015f8e0c26defdfb44 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 13:39:28 +0100 Subject: [PATCH 34/70] add GSL_FLAGS to CXXFLAGS --- src/amuse/community/arepo/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 3e67a00625..b055eb3c18 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -8,7 +8,7 @@ endif MPICXX ?= mpicxx CXX = $(MPICXX) CFLAGS += -Wall -g -CXXFLAGS += $(CFLAGS) +CXXFLAGS += $(CFLAGS) $(GSL_FLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) OBJS = interface.o @@ -38,5 +38,5 @@ worker_code.h: interface.py arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ -.cc.o: $< - $(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< +# .cc.o: $< +# $(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< From dfac304c57102167c571f4f9cb4289b95e713b09 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 13:39:50 +0100 Subject: [PATCH 35/70] remove include --- src/amuse/community/arepo/interface.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index a212d90b24..9815df3021 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -6,7 +6,6 @@ #include "src/main/allvars.h" #include "src/main/proto.h" -#include "src/main/run.c" // general interface functions: From d1da5b8a5ff4fa9339b93516ff125a1139aa8237 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 16:03:16 +0100 Subject: [PATCH 36/70] arepo is C not C++ --- src/amuse/community/arepo/interface.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 9815df3021..7374928af6 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -7,6 +7,10 @@ #include "src/main/allvars.h" #include "src/main/proto.h" +#ifdef __cplusplus +extern "C" { +#endif + // general interface functions: void set_default_parameters(){ @@ -350,3 +354,6 @@ int set_velocity(int index_of_the_particle, double vx, double vy, double vz){ return 0; } +#ifdef __cplusplus +} +#endif From 2f9296793592efb38bc7f1f810c338f48cc3e518 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 16:09:34 +0100 Subject: [PATCH 37/70] re-add lines --- src/amuse/community/arepo/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index b055eb3c18..934b56a210 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -38,5 +38,5 @@ worker_code.h: interface.py arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ -# .cc.o: $< -# $(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< +.cc.o: $< + $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< From 980aae5ca43042e6d9f00167a33e2965f25311d8 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Thu, 19 May 2022 18:15:59 +0100 Subject: [PATCH 38/70] create arepo interface.h and move #include allvars.h & proto.h along with ifdef Cpp choice there --- src/amuse/community/arepo/interface.cc | 17 ++--------------- src/amuse/community/arepo/interface.h | 10 ++++++++++ src/amuse/community/arepo/interface.py | 2 +- 3 files changed, 13 insertions(+), 16 deletions(-) create mode 100644 src/amuse/community/arepo/interface.h diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 7374928af6..525745f074 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -4,12 +4,6 @@ #include "worker_code.h" -#include "src/main/allvars.h" -#include "src/main/proto.h" - -#ifdef __cplusplus -extern "C" { -#endif // general interface functions: @@ -136,12 +130,6 @@ int initialize_code(){ determine_compute_nodes(); - for(PTask = 0; NTask > (1 << PTask); PTask++) - ; - - begrun0(); - - RestartFlag = 0; set_default_parameters(); begrun1(); /* set-up run */ @@ -354,6 +342,5 @@ int set_velocity(int index_of_the_particle, double vx, double vy, double vz){ return 0; } -#ifdef __cplusplus -} -#endif + + diff --git a/src/amuse/community/arepo/interface.h b/src/amuse/community/arepo/interface.h new file mode 100644 index 0000000000..2e1d880db7 --- /dev/null +++ b/src/amuse/community/arepo/interface.h @@ -0,0 +1,10 @@ +#ifdef __cplusplus +extern "C" { +#endif + +#include "src/allvars.h" +#include "src/proto.h" + +#ifdef __cplusplus +} +#endif diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 1f228321b9..c1da97b511 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -23,7 +23,7 @@ class ArepoInterface( .. [#] Weinberger, R., Springel, V., Pakmor, R., 2020, ApJS, 248, 32 (Public Code Release) [2020ApJS..248...32W] """ - include_headers = ["worker_code.h"] + include_headers = ["worker_code.h", "inteface.h"] def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) From b683887a8d3f0fc8514359762e56431aa90a863f Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 21:00:57 +0100 Subject: [PATCH 39/70] fix typo --- src/amuse/community/arepo/interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index c1da97b511..a8e770a79d 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -23,7 +23,7 @@ class ArepoInterface( .. [#] Weinberger, R., Springel, V., Pakmor, R., 2020, ApJS, 248, 32 (Public Code Release) [2020ApJS..248...32W] """ - include_headers = ["worker_code.h", "inteface.h"] + include_headers = ["worker_code.h", "interface.h"] def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) From 3e84f77e24339f50883f4c52bd7d4fa8510377ec Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 21:01:47 +0100 Subject: [PATCH 40/70] fix paths --- src/amuse/community/arepo/interface.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/interface.h b/src/amuse/community/arepo/interface.h index 2e1d880db7..890b78b765 100644 --- a/src/amuse/community/arepo/interface.h +++ b/src/amuse/community/arepo/interface.h @@ -2,8 +2,8 @@ extern "C" { #endif -#include "src/allvars.h" -#include "src/proto.h" +#include "src/main/allvars.h" +#include "src/main/proto.h" #ifdef __cplusplus } From 1cee0565eed561b659bee84980248b91751358d4 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Fri, 20 May 2022 09:57:07 +0100 Subject: [PATCH 41/70] add #include statement for new interface.h header --- src/amuse/community/arepo/interface.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 525745f074..ad3cfba8c9 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -3,7 +3,7 @@ #endif #include "worker_code.h" - +#include "interface.h" // general interface functions: From 719aaa3e6ee211460959a806fdbcc54cca54d2cc Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Fri, 20 May 2022 11:05:14 +0200 Subject: [PATCH 42/70] some fixes for c<->cpp --- src/amuse/community/arepo/interface.cc | 7 ++++++- src/amuse/community/arepo/interface.h | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 525745f074..ebaed03a03 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -1,12 +1,17 @@ +#include +#include + #ifndef NOMPI #include #endif +#include "interface.h" #include "worker_code.h" - // general interface functions: +using namespace std; + void set_default_parameters(){ // Relevant files strcpy(All.InitCondFile, "./snap_010"); diff --git a/src/amuse/community/arepo/interface.h b/src/amuse/community/arepo/interface.h index 890b78b765..7bcd4497c6 100644 --- a/src/amuse/community/arepo/interface.h +++ b/src/amuse/community/arepo/interface.h @@ -1,10 +1,13 @@ #ifdef __cplusplus extern "C" { +#define ___cplusplus +#undef __cplusplus #endif #include "src/main/allvars.h" #include "src/main/proto.h" -#ifdef __cplusplus +#ifdef ___cplusplus } +#define __cplusplus #endif From 81223f643360fa8850efab638ab13fbebbbe7647 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Fri, 20 May 2022 12:59:47 +0200 Subject: [PATCH 43/70] fixes to build, note the source changes --- src/amuse/community/arepo/Makefile | 2 +- src/amuse/community/arepo/src/Makefile | 3 ++- src/amuse/community/arepo/src/init/begrun.c | 2 +- src/amuse/community/arepo/src/main/main.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 934b56a210..271a4671ee 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -36,7 +36,7 @@ worker_code.h: interface.py $(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) - $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ + $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $(GMP_LIBS) $(GSL_LIBS) $< $(OBJS) $(CODELIB) -o $@ .cc.o: $< $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 197fa00cef..e852b39c53 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -16,11 +16,12 @@ LDFLAGS += -lm $(MUSE_LD_FLAGS) CODELIB = libarepo.a SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \ - hydro init io main mesh mpi_utils ngbtree star_formation subfind \ + hydro init io mesh mesh/voronoi mpi_utils ngbtree star_formation subfind \ time_integration utils SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c)) CODEOBJS = $(SRCS:c=o) +CODEOBJS += main/allvars.o main/run.o main/main.o AR = ar ruv RANLIB = ranlib diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c index 5db8ba6351..a70748cfd3 100644 --- a/src/amuse/community/arepo/src/init/begrun.c +++ b/src/amuse/community/arepo/src/init/begrun.c @@ -84,7 +84,7 @@ void begrun0(void) if(ThisTask == 0) { - output_compile_time_options(); +// output_compile_time_options(); } } diff --git a/src/amuse/community/arepo/src/main/main.c b/src/amuse/community/arepo/src/main/main.c index f1ae80be6a..0b0824b5b1 100644 --- a/src/amuse/community/arepo/src/main/main.c +++ b/src/amuse/community/arepo/src/main/main.c @@ -58,7 +58,7 @@ * * \return status of exit; 0 for normal exit. */ -int main(int argc, char **argv) +int no_main(int argc, char **argv) { // #ifdef IMPOSE_PINNING // detect_topology(); From 1e0c6b97ab41d2894819a35ff618405553f3bdda Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Fri, 20 May 2022 13:06:37 +0200 Subject: [PATCH 44/70] fix build to detect source changes --- src/amuse/community/arepo/Makefile | 4 +++- src/amuse/community/arepo/src/Makefile | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 271a4671ee..14eef8f6a0 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -26,7 +26,7 @@ clean: distclean: clean make -C src distclean -$(CODELIB): +$(CODELIB): .FORCE make -C src all worker_code.cc: interface.py @@ -40,3 +40,5 @@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) .cc.o: $< $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< + +.FORCE: diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index e852b39c53..23d0d99c52 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -41,4 +41,7 @@ $(CODELIB): $(CODEOBJS) $(RANLIB) $@ .cc.o: $< - $(CXX) $(CXXFLAGS) -c -o $@ $< + $(MPICXX) $(CXXFLAGS) -c -o $@ $< + +.c.o: $< + $(MPICC) $(CXXFLAGS) -c -o $@ $< From edc21b73b48e8bdb7a0a0fa93327c2a2495bd600 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Fri, 20 May 2022 13:51:22 +0100 Subject: [PATCH 45/70] move GSL_LIBS and GMP_LIBS to the end of the flags for Arepo make file --- src/amuse/community/arepo/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 14eef8f6a0..4fa877bb9a 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -36,7 +36,7 @@ worker_code.h: interface.py $(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) - $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $(GMP_LIBS) $(GSL_LIBS) $< $(OBJS) $(CODELIB) -o $@ + $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $< $(OBJS) $(CODELIB) -o $@ $(GMP_LIBS) $(GSL_LIBS) .cc.o: $< $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< From 73d53b70e02442a5a01e7e86cdc16075753c6d58 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Mon, 23 May 2022 08:56:56 +0100 Subject: [PATCH 46/70] remove extra mpi_init from initialize_code --- src/amuse/community/arepo/interface.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index a8e949cb4c..db8497b93a 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -121,10 +121,7 @@ void set_default_parameters(){ } int initialize_code(){ - int argc = 0; - char **argv=NULL; - MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); MPI_Comm_size(MPI_COMM_WORLD, &NTask); From ff2ea53cdbf05e7cce479363ff746bedf57e89b0 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Wed, 25 May 2022 11:04:23 +0200 Subject: [PATCH 47/70] Default to non-periodic gravity --- src/amuse/community/arepo/src/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 23d0d99c52..c0ec73a619 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -23,6 +23,10 @@ SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c)) CODEOBJS = $(SRCS:c=o) CODEOBJS += main/allvars.o main/run.o main/main.o +AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # no periodic boundaries by default + +CXXFLAGS += $(AREPOFLAGS) + AR = ar ruv RANLIB = ranlib RM = rm From 285b495bd60348d5c4a98cc1230433022e767e08 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Wed, 28 Sep 2022 11:25:39 +0000 Subject: [PATCH 48/70] Add missing calls to memory management helpers Add basic arepo test. Co-authored-by: Steven Rieder --- src/amuse/community/arepo/__init__.py | 3 ++- src/amuse/community/arepo/interface.cc | 3 ++- src/amuse/community/arepo/src/init/begrun.c | 6 ++++++ src/amuse/community/arepo/test_simple.py | 5 +++++ 4 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 src/amuse/community/arepo/test_simple.py diff --git a/src/amuse/community/arepo/__init__.py b/src/amuse/community/arepo/__init__.py index abe3ba85b6..b08b6187b7 100644 --- a/src/amuse/community/arepo/__init__.py +++ b/src/amuse/community/arepo/__init__.py @@ -1 +1,2 @@ -# generated file \ No newline at end of file +# generated file +from .interface import Arepo diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index db8497b93a..e05dd6a0dc 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -132,7 +132,8 @@ int initialize_code(){ init_cpu_log(); determine_compute_nodes(); - + // Needed to check available memory + mpi_report_committable_memory(); set_default_parameters(); begrun1(); /* set-up run */ diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c index a70748cfd3..f6944d7306 100644 --- a/src/amuse/community/arepo/src/init/begrun.c +++ b/src/amuse/community/arepo/src/init/begrun.c @@ -103,6 +103,12 @@ void begrun1(void) { /* read_parameter_file(ParameterFile); ... read in parameters for this run */ +#ifdef HOST_MEMORY_REPORTING + check_maxmemsize_setting(); +#endif /* #ifdef HOST_MEMORY_REPORTING */ + + mymalloc_init(); /* Added from read_parameter_file */ + check_parameters(); /* consistency check of parameters */ #ifdef HAVE_HDF5 diff --git a/src/amuse/community/arepo/test_simple.py b/src/amuse/community/arepo/test_simple.py new file mode 100644 index 0000000000..1951ac6edd --- /dev/null +++ b/src/amuse/community/arepo/test_simple.py @@ -0,0 +1,5 @@ +from amuse.community.arepo import Arepo + +# Check code runs without errors +x = Arepo(redirection="none") +x.initialize_code() From 62cd3459792301bf40106312c6934fbb4938b41b Mon Sep 17 00:00:00 2001 From: Volker Springel Date: Mon, 10 Jan 2022 21:21:32 +0100 Subject: [PATCH 49/70] small bug fix: in case HIERARCHICAL_GRAVITY is not used, and the maximum used timestep sizes increases during a step, it could happen that for particles on the maximum timestep one gravity half-step is not applied (because HighestActiveTimeBin increases) --- .../community/arepo/src/time_integration/do_gravity_hydro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c index 88b7f89a34..40a06ac282 100644 --- a/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c +++ b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c @@ -265,7 +265,7 @@ void find_gravity_timesteps_and_do_gravity_step_first_half(void) } /* reconstruct list of active particles because it is used for other things too (i.e. wind particles) */ - timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestSynchronizedTimeBin); sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); #else /* #ifdef HIERARCHICAL_GRAVITY */ @@ -276,7 +276,7 @@ void find_gravity_timesteps_and_do_gravity_step_first_half(void) timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, TIMEBINS); else #endif /* #ifdef FORCE_EQUAL_TIMESTEPS */ - timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestSynchronizedTimeBin); sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); mpi_printf("KICKS: 1st gravity for highest active timebin=%d: particles %lld\n", All.HighestActiveTimeBin, From 33d3f94feb37d69d4115bf09d8b663dd3abd4708 Mon Sep 17 00:00:00 2001 From: Volker Springel Date: Thu, 2 Jun 2022 13:26:13 +0200 Subject: [PATCH 50/70] removed non-standard uint in favor of 'unsigned int' --- src/amuse/community/arepo/src/io/hdf5_util.c | 2 +- src/amuse/community/arepo/src/main/proto.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/src/io/hdf5_util.c b/src/amuse/community/arepo/src/io/hdf5_util.c index a613a36bdc..a690bd71f3 100644 --- a/src/amuse/community/arepo/src/io/hdf5_util.c +++ b/src/amuse/community/arepo/src/io/hdf5_util.c @@ -847,7 +847,7 @@ herr_t my_H5Pset_shuffle(hid_t plist_id) * * \return Non-negative value if successful. */ -herr_t my_H5Pset_deflate(hid_t plist_id, uint level) +herr_t my_H5Pset_deflate(hid_t plist_id, unsigned int level) { herr_t status = H5Pset_deflate(plist_id, level); if(status < 0) diff --git a/src/amuse/community/arepo/src/main/proto.h b/src/amuse/community/arepo/src/main/proto.h index 15a346f1bc..61bdaad467 100644 --- a/src/amuse/community/arepo/src/main/proto.h +++ b/src/amuse/community/arepo/src/main/proto.h @@ -598,7 +598,7 @@ hid_t my_H5Pcreate(hid_t class_id); herr_t my_H5Pclose(hid_t plist); herr_t my_H5Pset_chunk(hid_t plist, int ndims, const hsize_t *dim); herr_t my_H5Pset_shuffle(hid_t plist_id); -herr_t my_H5Pset_deflate(hid_t plist_id, uint level); +herr_t my_H5Pset_deflate(hid_t plist_id, unsigned int level); herr_t my_H5Pset_fletcher32(hid_t plist_id); #endif /* #ifdef HDF5_FILTERS */ From 0193040db0bc011ee7552f088720ef19c3818081 Mon Sep 17 00:00:00 2001 From: Volker Springel Date: Thu, 2 Jun 2022 14:03:04 +0200 Subject: [PATCH 51/70] disabled a superfluous call of get_starformtion_rate() --- src/amuse/community/arepo/src/cooling/cooling.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/src/cooling/cooling.c b/src/amuse/community/arepo/src/cooling/cooling.c index 7e7cebbc98..3baf82d3a3 100644 --- a/src/amuse/community/arepo/src/cooling/cooling.c +++ b/src/amuse/community/arepo/src/cooling/cooling.c @@ -477,9 +477,9 @@ void SetOutputGasState(int i, double *ne_guess, double *nH0, double *coolrate) double u = dmax(All.MinEgySpec, SphP[i].Utherm); /* update GasState as appropriate given compile-time options and cell properties */ -#if defined(USE_SFR) - sfr = get_starformation_rate(i); -#endif /* #if defined(USE_SFR) */ + // #if defined(USE_SFR) + // sfr = get_starformation_rate(i); // call is superfluous at this place + // #endif /* update DoCool */ DoCool.u_old_input = u; From 1a1d078381510e2c0ec05f9e7a029ec076d1e85e Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Wed, 12 Oct 2022 14:13:06 +0000 Subject: [PATCH 52/70] Prepare to run Arepo example Preparing to run the Arepo example `isolated_galaxy_collusionless_3d`. Arepo flags (in `arepo/src/Makefile`) sourced from `arepo/run/examples/myexample/Config.sh`. Co-authored-by: thomasguillet --- src/amuse/community/arepo/ICs.placeholder | 3 +++ src/amuse/community/arepo/interface.cc | 6 ++++-- src/amuse/community/arepo/src/Makefile | 4 ++++ src/amuse/community/arepo/test_simple.py | 1 + 4 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 src/amuse/community/arepo/ICs.placeholder diff --git a/src/amuse/community/arepo/ICs.placeholder b/src/amuse/community/arepo/ICs.placeholder new file mode 100644 index 0000000000..6185e7904d --- /dev/null +++ b/src/amuse/community/arepo/ICs.placeholder @@ -0,0 +1,3 @@ +File ICs taken from arepo examples: + + arepo/examples/isolated_galaxy_collisionless_3d/ICs/ICs diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index e05dd6a0dc..30c97ddd4d 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -15,7 +15,7 @@ using namespace std; void set_default_parameters(){ // Relevant files - strcpy(All.InitCondFile, "./snap_010"); + strcpy(All.InitCondFile, "./ICs"); strcpy(All.OutputDir, "./output"); strcpy(All.SnapshotFileBase, "snap"); strcpy(All.OutputListFilename, "./output_list.txt"); @@ -52,7 +52,7 @@ void set_default_parameters(){ // Output frequency and output parameters All.OutputListOn = 1; - All.TimeBetSnapshot = 0.0; + All.TimeBetSnapshot = 0.1; All.TimeOfFirstSnapshot = 0.0; All.TimeBetStatistics = 0.01; All.NumFilesPerSnapshot = 1; @@ -161,6 +161,8 @@ int initialize_code(){ } begrun2(); + /* TODO run() temporarily added to initialization for testing */ + run(); return 0; } diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index c0ec73a619..7985410152 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -24,9 +24,13 @@ CODEOBJS = $(SRCS:c=o) CODEOBJS += main/allvars.o main/run.o main/main.o AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # no periodic boundaries by default +AREPOFLAGS += -DHAVE_HDF5 -DH5_USE_16_API CXXFLAGS += $(AREPOFLAGS) +# Add includes +CXXFLAGS += $(HDF5_FLAGS) + AR = ar ruv RANLIB = ranlib RM = rm diff --git a/src/amuse/community/arepo/test_simple.py b/src/amuse/community/arepo/test_simple.py index 1951ac6edd..30a2c1027e 100644 --- a/src/amuse/community/arepo/test_simple.py +++ b/src/amuse/community/arepo/test_simple.py @@ -3,3 +3,4 @@ # Check code runs without errors x = Arepo(redirection="none") x.initialize_code() +x.run_sim() \ No newline at end of file From d553de2d14a5245a4db9aff5675a90389c28d4f8 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 13 Oct 2022 15:28:51 +0000 Subject: [PATCH 53/70] Placeholder functions for evolution code --- src/amuse/community/arepo/test_simple.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/test_simple.py b/src/amuse/community/arepo/test_simple.py index 30a2c1027e..47fde3e279 100644 --- a/src/amuse/community/arepo/test_simple.py +++ b/src/amuse/community/arepo/test_simple.py @@ -1,6 +1,13 @@ from amuse.community.arepo import Arepo +# import unit seconds as s # Check code runs without errors x = Arepo(redirection="none") x.initialize_code() -x.run_sim() \ No newline at end of file +#x.run_sim() +#END_TIME = 1.0 | s +#x.evolve_model(END_TIME) + +# x.(evolve for a single timestep) + +# amuse tests to check for diverging behaviour From f3d2f8ad47324c70ce3ca6374127e2e641f508a4 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Wed, 26 Oct 2022 11:36:34 +0000 Subject: [PATCH 54/70] Add flags to arepo makefile --- src/amuse/community/arepo/Makefile | 2 +- src/amuse/community/arepo/src/Makefile | 19 ++++++++++++++++++- .../community/arepo/src/gitversion/version.c | 4 ++++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 src/amuse/community/arepo/src/gitversion/version.c diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 4fa877bb9a..0134c43831 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -36,7 +36,7 @@ worker_code.h: interface.py $(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) - $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $< $(OBJS) $(CODELIB) -o $@ $(GMP_LIBS) $(GSL_LIBS) + $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $< $(OBJS) $(CODELIB) -o $@ $(GMP_LIBS) $(GSL_LIBS) $(HDF5_FLAGS) $(HDF5_LIBS) .cc.o: $< $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 7985410152..76ad3be4ef 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -19,12 +19,29 @@ SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \ hydro init io mesh mesh/voronoi mpi_utils ngbtree star_formation subfind \ time_integration utils SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c)) +SRCS += main/allvars.c CODEOBJS = $(SRCS:c=o) CODEOBJS += main/allvars.o main/run.o main/main.o AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # no periodic boundaries by default -AREPOFLAGS += -DHAVE_HDF5 -DH5_USE_16_API +AREPOFLAGS += -DSELFGRAVITY # gravitational intraction between simulation particles/cells +AREPOFLAGS += -DHIERARCHICAL_GRAVITY # use hierarchical splitting of the time integration of the gravity +AREPOFLAGS += -DCELL_CENTER_GRAVITY # uses geometric centers to calculate gravity of cells, only possible with HIERARCHICAL_GRAVITY +AREPOFLAGS += -DALLOW_DIRECT_SUMMATION # Performed direct summation instead of tree-based gravity if number of active particles < DIRECT_SUMMATION_THRESHOLD (= 3000 unless specified differently here) +AREPOFLAGS += -DDIRECT_SUMMATION_THRESHOLD=500 # Overrides maximum number of active particles for which direct summation is performed instead of tree based calculation +# AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # gravity is not treated periodically +AREPOFLAGS += -DNSOFTTYPES=2 # Number of different softening values to which particle types can be mapped. +AREPOFLAGS += -DMULTIPLE_NODE_SOFTENING # If a tree node is to be used which is softened, this is done with the softenings of its different mass components +AREPOFLAGS += -DINDIVIDUAL_GRAVITY_SOFTENING=32 # bitmask with particle types where the softenig type should be chosen with that of parttype 1 as a reference type +AREPOFLAGS += -DADAPTIVE_HYDRO_SOFTENING # Adaptive softening of gas cells depending on their size +AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep criterion (take 'signal speed' into account) +AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. +AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension +AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) +# AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) +# AREPOFLAGS += -DDEBUG # enables core-dumps + CXXFLAGS += $(AREPOFLAGS) diff --git a/src/amuse/community/arepo/src/gitversion/version.c b/src/amuse/community/arepo/src/gitversion/version.c new file mode 100644 index 0000000000..ddb27af071 --- /dev/null +++ b/src/amuse/community/arepo/src/gitversion/version.c @@ -0,0 +1,4 @@ +#include "version.h" + +const char* GIT_DATE = ""; +const char* GIT_COMMIT = ""; From 03f2c55b11099f29c1b8e56fb9baab3eff996492 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Fri, 4 Nov 2022 06:41:53 +0000 Subject: [PATCH 55/70] Add Arepo flags to parent Makefile May want to remove from child makefile to avoid duplication. Co-authored-by: Steven Rieder --- src/amuse/community/arepo/Makefile | 20 +++++++++++++++++++- src/amuse/community/arepo/interface.cc | 4 ++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 0134c43831..0e3774194c 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -5,10 +5,27 @@ ifeq ($(origin AMUSE_DIR), undefined) endif -include $(AMUSE_DIR)/config.mk +AREPOFLAGS += -DSELFGRAVITY # gravitational intraction between simulation particles/cells +AREPOFLAGS += -DHIERARCHICAL_GRAVITY # use hierarchical splitting of the time integration of the gravity +AREPOFLAGS += -DCELL_CENTER_GRAVITY # uses geometric centers to calculate gravity of cells, only possible with HIERARCHICAL_GRAVITY +AREPOFLAGS += -DALLOW_DIRECT_SUMMATION # Performed direct summation instead of tree-based gravity if number of active particles < DIRECT_SUMMATION_THRESHOLD (= 3000 unless specified differently here) +AREPOFLAGS += -DDIRECT_SUMMATION_THRESHOLD=500 # Overrides maximum number of active particles for which direct summation is performed instead of tree based calculation +AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # gravity is not treated periodically +AREPOFLAGS += -DNSOFTTYPES=2 # Number of different softening values to which particle types can be mapped. +AREPOFLAGS += -DMULTIPLE_NODE_SOFTENING # If a tree node is to be used which is softened, this is done with the softenings of its different mass components +AREPOFLAGS += -DINDIVIDUAL_GRAVITY_SOFTENING=32 # bitmask with particle types where the softenig type should be chosen with that of parttype 1 as a reference type +AREPOFLAGS += -DADAPTIVE_HYDRO_SOFTENING # Adaptive softening of gas cells depending on their size +AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep criterion (take 'signal speed' into account) +AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. +AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension +AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) +# AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) +# AREPOFLAGS += -DDEBUG # enables core-dumps + MPICXX ?= mpicxx CXX = $(MPICXX) CFLAGS += -Wall -g -CXXFLAGS += $(CFLAGS) $(GSL_FLAGS) +CXXFLAGS += $(CFLAGS) $(GSL_FLAGS) $(AREPOFLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) OBJS = interface.o @@ -27,6 +44,7 @@ distclean: clean make -C src distclean $(CODELIB): .FORCE + export AREPOFLAGS make -C src all worker_code.cc: interface.py diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 30c97ddd4d..d7e7f0016a 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -36,6 +36,7 @@ void set_default_parameters(){ // Characteristics of run All.TimeBegin = 0.0; All.TimeMax = 1.0; + All.TimeStep = 0.1; // Basic code options that set simulation type All.ComovingIntegrationOn = 0; @@ -136,6 +137,9 @@ int initialize_code(){ mpi_report_committable_memory(); set_default_parameters(); + + // May not need to do this (we want AMUSE to manage this) + // MPI_Bcast(&All, sizeof(struct global_data_all_processes), MPI_BYTE, 0, MPI_COMM_WORLD); begrun1(); /* set-up run */ char fname[MAXLEN_PATH]; From afdfa29e67474915164a570f102047c1778cada0 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 10 Nov 2022 12:02:29 +0000 Subject: [PATCH 56/70] Force stop and skip dump Add Arepo flag "Multiple restarts" (effect not fully tested). Co-authored-by: Steven Rieder --- src/amuse/community/arepo/Makefile | 1 + src/amuse/community/arepo/src/Makefile | 4 ++-- src/amuse/community/arepo/src/io/io.c | 1 + src/amuse/community/arepo/src/main/run.c | 5 +++++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 0e3774194c..d2ae133da6 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -19,6 +19,7 @@ AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep cr AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) +AREPOFLAGS += -DMULTIPLE_RESTARTS # AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) # AREPOFLAGS += -DDEBUG # enables core-dumps diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 76ad3be4ef..f789f81e0b 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -24,13 +24,12 @@ SRCS += main/allvars.c CODEOBJS = $(SRCS:c=o) CODEOBJS += main/allvars.o main/run.o main/main.o -AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # no periodic boundaries by default AREPOFLAGS += -DSELFGRAVITY # gravitational intraction between simulation particles/cells AREPOFLAGS += -DHIERARCHICAL_GRAVITY # use hierarchical splitting of the time integration of the gravity AREPOFLAGS += -DCELL_CENTER_GRAVITY # uses geometric centers to calculate gravity of cells, only possible with HIERARCHICAL_GRAVITY AREPOFLAGS += -DALLOW_DIRECT_SUMMATION # Performed direct summation instead of tree-based gravity if number of active particles < DIRECT_SUMMATION_THRESHOLD (= 3000 unless specified differently here) AREPOFLAGS += -DDIRECT_SUMMATION_THRESHOLD=500 # Overrides maximum number of active particles for which direct summation is performed instead of tree based calculation -# AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # gravity is not treated periodically +AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # gravity is not treated periodically AREPOFLAGS += -DNSOFTTYPES=2 # Number of different softening values to which particle types can be mapped. AREPOFLAGS += -DMULTIPLE_NODE_SOFTENING # If a tree node is to be used which is softened, this is done with the softenings of its different mass components AREPOFLAGS += -DINDIVIDUAL_GRAVITY_SOFTENING=32 # bitmask with particle types where the softenig type should be chosen with that of parttype 1 as a reference type @@ -39,6 +38,7 @@ AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep cr AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) +AREPOFLAGS += -DMULTIPLE_RESTARTS # AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) # AREPOFLAGS += -DDEBUG # enables core-dumps diff --git a/src/amuse/community/arepo/src/io/io.c b/src/amuse/community/arepo/src/io/io.c index f5d9a0c73f..8610e2413c 100644 --- a/src/amuse/community/arepo/src/io/io.c +++ b/src/amuse/community/arepo/src/io/io.c @@ -366,6 +366,7 @@ void savepositions(int num, int subbox_flag) char buf[500]; int n, filenr, gr, ngroups, masterTask, lastTask; double t0, t1; + return; t0 = second(); CPU_Step[CPU_MISC] += measure_time(); diff --git a/src/amuse/community/arepo/src/main/run.c b/src/amuse/community/arepo/src/main/run.c index 0bdca04354..56ecff7a6a 100644 --- a/src/amuse/community/arepo/src/main/run.c +++ b/src/amuse/community/arepo/src/main/run.c @@ -441,6 +441,11 @@ int check_for_interruption_of_run(void) printf("reaching time-limit. stopping.\n"); stopflag = 2; } + + if(All.Time >= All.TimeMax) + { + stopflag = 1; // AMUSE stop: time reached. Emulate stop file behaviour. + } } MPI_Bcast(&stopflag, 1, MPI_INT, 0, MPI_COMM_WORLD); From b8e2539bf8c1ac950673431908e5b9473745f2ad Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 10 Nov 2022 12:04:58 +0000 Subject: [PATCH 57/70] Add evolve_model function Modify test parameters. Co-authored-by: Steven Rieder --- src/amuse/community/arepo/interface.cc | 9 ++++++--- src/amuse/community/arepo/test_simple.py | 7 ++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index d7e7f0016a..720c4e8b2e 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -35,8 +35,8 @@ void set_default_parameters(){ // Characteristics of run All.TimeBegin = 0.0; - All.TimeMax = 1.0; - All.TimeStep = 0.1; + All.TimeMax = 2.7; + All.TimeStep = 0.00314159; // Basic code options that set simulation type All.ComovingIntegrationOn = 0; @@ -166,7 +166,6 @@ int initialize_code(){ begrun2(); /* TODO run() temporarily added to initialization for testing */ - run(); return 0; } @@ -229,6 +228,10 @@ int get_total_mass(double * mass){ } int evolve_model(double time){ + printf("AMUSE interface: setting TimeMax from %g to %g\n", All.TimeMax, time); + All.TimeMax = time; + //All.TimeStep = time - All.Time; + run(); return 0; } diff --git a/src/amuse/community/arepo/test_simple.py b/src/amuse/community/arepo/test_simple.py index 47fde3e279..248b3d1997 100644 --- a/src/amuse/community/arepo/test_simple.py +++ b/src/amuse/community/arepo/test_simple.py @@ -1,9 +1,14 @@ from amuse.community.arepo import Arepo +from amuse.units import nbody_system # import unit seconds as s # Check code runs without errors x = Arepo(redirection="none") -x.initialize_code() +# x.initialize_code() +print("Evolving") +x.evolve_model(0.001) +print("Evolving another step") +x.evolve_model(0.01) #x.run_sim() #END_TIME = 1.0 | s #x.evolve_model(END_TIME) From f044fd042546f68830982713d6e19908ada0dd72 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 10 Nov 2022 21:51:01 +0000 Subject: [PATCH 58/70] Change snapshot format Avoid HDF5 - we will use AMUSE to extract data instead of snapshotting. --- src/amuse/community/arepo/interface.cc | 2 +- src/amuse/community/arepo/src/io/io.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 720c4e8b2e..06edf303bb 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -22,7 +22,7 @@ void set_default_parameters(){ // File formats All.ICFormat = 1; - All.SnapFormat = 3; + All.SnapFormat = 1; // CPU-time LimitUBelowThisDensity All.TimeLimitCPU = 93000; diff --git a/src/amuse/community/arepo/src/io/io.c b/src/amuse/community/arepo/src/io/io.c index 8610e2413c..f5d9a0c73f 100644 --- a/src/amuse/community/arepo/src/io/io.c +++ b/src/amuse/community/arepo/src/io/io.c @@ -366,7 +366,6 @@ void savepositions(int num, int subbox_flag) char buf[500]; int n, filenr, gr, ngroups, masterTask, lastTask; double t0, t1; - return; t0 = second(); CPU_Step[CPU_MISC] += measure_time(); From 354164c598e9d02ea5060fde2ebbcc3860730fd3 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 10 Nov 2022 21:53:25 +0000 Subject: [PATCH 59/70] Remove unnecessary Arepo flag The multiple restarts flag keeps more than one restart file for unreliable systems. --- src/amuse/community/arepo/Makefile | 1 - src/amuse/community/arepo/src/Makefile | 1 - 2 files changed, 2 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index d2ae133da6..0e3774194c 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -19,7 +19,6 @@ AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep cr AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) -AREPOFLAGS += -DMULTIPLE_RESTARTS # AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) # AREPOFLAGS += -DDEBUG # enables core-dumps diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index f789f81e0b..374c807430 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -38,7 +38,6 @@ AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep cr AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) -AREPOFLAGS += -DMULTIPLE_RESTARTS # AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) # AREPOFLAGS += -DDEBUG # enables core-dumps From 85b102ef4fd3111fa4a0a209beae810821e4d417 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 10 Nov 2022 21:56:12 +0000 Subject: [PATCH 60/70] Set restart flag in initization Re-add initialization code to test script. --- src/amuse/community/arepo/interface.cc | 2 ++ src/amuse/community/arepo/test_simple.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 06edf303bb..1a734de624 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -138,6 +138,8 @@ int initialize_code(){ set_default_parameters(); + RestartFlag = 0; + // May not need to do this (we want AMUSE to manage this) // MPI_Bcast(&All, sizeof(struct global_data_all_processes), MPI_BYTE, 0, MPI_COMM_WORLD); begrun1(); /* set-up run */ diff --git a/src/amuse/community/arepo/test_simple.py b/src/amuse/community/arepo/test_simple.py index 248b3d1997..9cc8c95d6e 100644 --- a/src/amuse/community/arepo/test_simple.py +++ b/src/amuse/community/arepo/test_simple.py @@ -4,11 +4,11 @@ # Check code runs without errors x = Arepo(redirection="none") -# x.initialize_code() +x.initialize_code() print("Evolving") -x.evolve_model(0.001) +x.evolve_model(0.00001) print("Evolving another step") -x.evolve_model(0.01) +x.evolve_model(0.00002) #x.run_sim() #END_TIME = 1.0 | s #x.evolve_model(END_TIME) From 44cfc5df09c60e3ecc5b5cf441192cc3917f716b Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Mon, 14 Nov 2022 16:06:10 +0000 Subject: [PATCH 61/70] Add get_position interface function Include naive find_particle_with_ID to be optimized later. Co-authored-by: Thomas Guillet --- src/amuse/community/arepo/interface.cc | 19 ++++++++++++++++++- src/amuse/community/arepo/test_simple.py | 12 ++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 1a734de624..e82f38e974 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -331,9 +331,26 @@ int get_velocity(int index_of_the_particle, double * vx, double * vy, return 0; } +static int find_particle_with_ID(int particle_id) { + for (int p = 0; p= 0) { + *x = P[p].Pos[0]; + *y = P[p].Pos[1]; + *z = P[p].Pos[2]; + return 0; + } + return -3; } int set_position(int index_of_the_particle, double x, double y, double z){ diff --git a/src/amuse/community/arepo/test_simple.py b/src/amuse/community/arepo/test_simple.py index 9cc8c95d6e..9393322513 100644 --- a/src/amuse/community/arepo/test_simple.py +++ b/src/amuse/community/arepo/test_simple.py @@ -5,10 +5,14 @@ # Check code runs without errors x = Arepo(redirection="none") x.initialize_code() -print("Evolving") -x.evolve_model(0.00001) -print("Evolving another step") -x.evolve_model(0.00002) +print(x.get_position(21057)); +print(x.get_position(21060)); + +#print("Evolving") +#x.evolve_model(0.00001) +#print("Evolving another step") +#x.evolve_model(0.00002) + #x.run_sim() #END_TIME = 1.0 | s #x.evolve_model(END_TIME) From d9f233d0e22b0b181a82d26f668681074cd5de78 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 17 Nov 2022 16:15:10 +0000 Subject: [PATCH 62/70] Add arepo get functions Currently with a naive search for particle ID. --- src/amuse/community/arepo/interface.cc | 41 +++++++++++++++++++------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index e82f38e974..d9daa16b25 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -196,8 +196,25 @@ int cleanup_code(){ return 0; } +// Naive search for ID +// TODO: Implement this with a map (reverse look-up), possibly with a check +// to see if the ID is wrong and we need to re-create the map +static int find_particle_with_ID(int particle_id) { + for (int p = 0; p= 0) { + *mass = P[p].Mass; + return 0; + } + return -3; } int commit_particles(){ @@ -205,6 +222,7 @@ int commit_particles(){ } int get_time(double * time){ + *time = All.Time; return 0; } @@ -242,6 +260,7 @@ int set_eps2(double epsilon_squared){ } int get_begin_time(double * time){ + *time = All.TimeBegin; return 0; } @@ -274,10 +293,12 @@ int set_state(int index_of_the_particle, double mass, double x, double y, int get_state(int index_of_the_particle, double * mass, double * x, double * y, double * z, double * vx, double * vy, double * vz, double * radius){ + // Arepo has delaunay cell radii. return 0; } int get_time_step(double * time_step){ + *time_step = All.TimeStep; return 0; } @@ -311,6 +332,7 @@ int get_radius(int index_of_the_particle, double * radius){ } int set_begin_time(double time){ + All.TimeBegin = time; return 0; } @@ -328,16 +350,14 @@ int get_potential_energy(double * potential_energy){ int get_velocity(int index_of_the_particle, double * vx, double * vy, double * vz){ - return 0; -} - -static int find_particle_with_ID(int particle_id) { - for (int p = 0; p= 0) { + *vx = P[p].Vel[0]; + *vy = P[p].Vel[1]; + *vz = P[p].Vel[2]; + return 0; } - return -1; + return -3; } int get_position(int index_of_the_particle, double * x, double * y, @@ -359,6 +379,7 @@ int set_position(int index_of_the_particle, double x, double y, double z){ int get_acceleration(int index_of_the_particle, double * ax, double * ay, double * az){ + // TODO: allvars.h defines a GravAccel vector if this is what we want? return 0; } From 91716b1bb401a13f8f6b5eaa53d6a86658cd82dc Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Fri, 18 Nov 2022 15:57:46 +0000 Subject: [PATCH 63/70] Add Arepo particle ID lookup Using `size_t` for the output position ID, but this is defined in Arepo as an unsigned int. --- src/amuse/community/arepo/interface.cc | 49 ++++++++++++++++++++---- src/amuse/community/arepo/test_simple.py | 4 +- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index d9daa16b25..5a66b49743 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -1,5 +1,6 @@ #include #include +#include #ifndef NOMPI #include @@ -13,6 +14,13 @@ using namespace std; + +// Global ID_RLOOKUP will be initalized by create_ID_reverse_lookup in +// initialize_code() +map ID_RLOOKUP; +static void create_ID_reverse_lookup(); + + void set_default_parameters(){ // Relevant files strcpy(All.InitCondFile, "./ICs"); @@ -167,7 +175,8 @@ int initialize_code(){ } begrun2(); - /* TODO run() temporarily added to initialization for testing */ + create_ID_reverse_lookup(); + return 0; } @@ -196,14 +205,39 @@ int cleanup_code(){ return 0; } -// Naive search for ID -// TODO: Implement this with a map (reverse look-up), possibly with a check -// to see if the ID is wrong and we need to re-create the map +static void create_ID_reverse_lookup() { + map id_rlookup_local; + for (size_t i = 0; i < NumPart; i++) { + MyIDType id = P[i].ID; + id_rlookup_local[id] = i; + } + ID_RLOOKUP = id_rlookup_local; +} + static int find_particle_with_ID(int particle_id) { - for (int p = 0; p= 0) { *x = P[p].Pos[0]; diff --git a/src/amuse/community/arepo/test_simple.py b/src/amuse/community/arepo/test_simple.py index 9393322513..9a32c251da 100644 --- a/src/amuse/community/arepo/test_simple.py +++ b/src/amuse/community/arepo/test_simple.py @@ -5,8 +5,8 @@ # Check code runs without errors x = Arepo(redirection="none") x.initialize_code() -print(x.get_position(21057)); -print(x.get_position(21060)); +print(x.get_position(21057)) # This is the ID of P[0] +print(x.get_position(21060)) #print("Evolving") #x.evolve_model(0.00001) From af586a19516da06210ed4621a5ecb0614e2190f4 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Fri, 18 Nov 2022 15:58:29 +0000 Subject: [PATCH 64/70] Match Arepo interface with Gadget2 --- src/amuse/community/arepo/interface.cc | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 5a66b49743..7d7798733d 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -256,6 +256,7 @@ int commit_particles(){ } int get_time(double * time){ + if (ThisTask) {return 0;} *time = All.Time; return 0; } @@ -290,16 +291,19 @@ int evolve_model(double time){ } int set_eps2(double epsilon_squared){ - return 0; + if (ThisTask) {return 0;} + return -2; } int get_begin_time(double * time){ + if (ThisTask) {return 0;} *time = All.TimeBegin; return 0; } int get_eps2(double * epsilon_squared){ - return 0; + if (ThisTask) {return 0;} + return -2; } int get_index_of_next_particle(int index_of_the_particle, @@ -332,6 +336,7 @@ int get_state(int index_of_the_particle, double * mass, double * x, } int get_time_step(double * time_step){ + if (ThisTask) {return 0;} *time_step = All.TimeStep; return 0; } @@ -345,12 +350,14 @@ int get_kinetic_energy(double * kinetic_energy){ } int get_number_of_particles(int * number_of_particles){ + if (ThisTask) {return 0;} + *number_of_particles = All.TotNumPart; return 0; } int set_acceleration(int index_of_the_particle, double ax, double ay, double az){ - return 0; + return -2; } int get_center_of_mass_position(double * x, double * y, double * z){ @@ -362,7 +369,7 @@ int get_center_of_mass_velocity(double * vx, double * vy, double * vz){ } int get_radius(int index_of_the_particle, double * radius){ - return 0; + return -2; } int set_begin_time(double time){ @@ -371,7 +378,7 @@ int set_begin_time(double time){ } int set_radius(int index_of_the_particle, double radius){ - return 0; + return -2; } int recommit_parameters(){ From c80426de572e0c9f38dade9fbd54299bc5056d3d Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Fri, 2 Dec 2022 12:16:06 +0000 Subject: [PATCH 65/70] Add comments to Arepo interface --- src/amuse/community/arepo/interface.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 7d7798733d..ff2e4b650e 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -175,7 +175,6 @@ int initialize_code(){ } begrun2(); - create_ID_reverse_lookup(); return 0; } @@ -223,7 +222,7 @@ static int find_particle_with_ID(int particle_id) { if (it == ID_RLOOKUP.end()) { // particle_id wasn't in the map - rebuild ID_RLOOKUP and try again - cout << "AMUSE: Rebuilding particle_ID lookup.\n"; + cout << "AMUSE: Rebuilding particle_ID lookup (ID not found).\n"; create_ID_reverse_lookup(); continue; } @@ -232,7 +231,7 @@ static int find_particle_with_ID(int particle_id) { if (P[particle_pos].ID != particle_id) { // particle_id had the wrong value - rebuild ID_RLOOKUP and try again - cout << "AMUSE: Rebuilding particle ID lookup table.\n"; + cout << "AMUSE: Rebuilding particle ID lookup table (ID index changed).\n"; create_ID_reverse_lookup(); continue; } @@ -256,6 +255,7 @@ int commit_particles(){ } int get_time(double * time){ + // Return error code if calling from non-zero task if (ThisTask) {return 0;} *time = All.Time; return 0; @@ -291,6 +291,7 @@ int evolve_model(double time){ } int set_eps2(double epsilon_squared){ + // This looks bizarre if (ThisTask) {return 0;} return -2; } From 2f0104b8dbcb5d7b0fbf9b33ee90c63b46dee920 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Fri, 2 Dec 2022 12:17:42 +0000 Subject: [PATCH 66/70] Add script to track and plot particles Based on test_simple.py. --- src/amuse/community/arepo/test_positions.py | 80 +++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 src/amuse/community/arepo/test_positions.py diff --git a/src/amuse/community/arepo/test_positions.py b/src/amuse/community/arepo/test_positions.py new file mode 100644 index 0000000000..23b437f010 --- /dev/null +++ b/src/amuse/community/arepo/test_positions.py @@ -0,0 +1,80 @@ +import random +import numpy as np +from matplotlib import pyplot as plt +from amuse.community.arepo import Arepo +# from amuse.units import nbody_system +# import unit seconds as s + +random.seed(123) +N_PLOT_PARTICLES = 300 + +# Check code runs without errors +x = Arepo(redirection="none") +x.initialize_code() + +n_particles_total = x.get_number_of_particles() +tracked_ids = random.sample(range(n_particles_total), k=N_PLOT_PARTICLES) + +positions = {} + +# Get start position of tracked particles +for id in tracked_ids: + positions[id] = [x.get_position(id)] + +print("Evolving") +x.evolve_model(0.00001) + +# Update positions of tracked particles +for id in tracked_ids: + positions[id].append(x.get_position(id)) + +print("Evolving another step") +x.evolve_model(0.00002) + +# Update positions of tracked particles +for id in tracked_ids: + positions[id].append(x.get_position(id)) + + +def dist(p0, p1): + return np.linalg.norm(np.array(p1) - np.array(p0)) + + +# Print the paths of 5 particles +for id, _ in zip(positions, range(5)): + print(id) + print(positions[id]) + print(dist(positions[id][0], positions[id][-1])) + print() + +# Plot1 +fig, ax = plt.subplots(subplot_kw=dict(projection='3d')) +for id, pos in positions.items(): + ax.plot(*np.array(pos).T) +fig.savefig('positions_1.png') + +# Plot2 +fig, ax = plt.subplots(subplot_kw=dict(projection='3d')) +for id, pos in positions.items(): + ax.plot(*np.array(pos).T) +ax.set_xlim([-1000, 1000]) +ax.set_ylim([-1000, 1000]) +ax.set_zlim([-1000, 1000]) +fig.savefig('positions_2.png') + +# Plot3 +fig, ax = plt.subplots(subplot_kw=dict(projection='3d')) +for id, pos in positions.items(): + ax.plot(*np.array(pos).T) +ax.set_xlim([-100, 100]) +ax.set_ylim([-100, 100]) +ax.set_zlim([-100, 100]) +fig.savefig('positions_3.png') + +# x.run_sim() +# END_TIME = 1.0 | s +# x.evolve_model(END_TIME) + +# x.(evolve for a single timestep) + +# amuse tests to check for diverging behaviour From a0908e02d574f11fa53b1f853b41e955e6a889c5 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 22 Dec 2022 14:21:08 +0000 Subject: [PATCH 67/70] Add HDF5 flags to Makefiles, remove missing call Removed call to missing `write_compile_time_options_in_hdf5` function in `src/io/io.c`. In Arepo this function is written to a file by a perl script at compile time. --- src/amuse/community/arepo/Makefile | 8 +++++--- src/amuse/community/arepo/src/Makefile | 10 +++++++--- src/amuse/community/arepo/src/io/io.c | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 0e3774194c..fc9ac559c2 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -19,13 +19,15 @@ AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep cr AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) -# AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) +AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) # AREPOFLAGS += -DDEBUG # enables core-dumps +HDF5_FLAGS += -DH5_USE_16_API + MPICXX ?= mpicxx CXX = $(MPICXX) CFLAGS += -Wall -g -CXXFLAGS += $(CFLAGS) $(GSL_FLAGS) $(AREPOFLAGS) +CXXFLAGS += $(CFLAGS) $(GSL_FLAGS) $(AREPOFLAGS) $(HDF5_FLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) OBJS = interface.o @@ -54,7 +56,7 @@ worker_code.h: interface.py $(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) - $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $< $(OBJS) $(CODELIB) -o $@ $(GMP_LIBS) $(GSL_LIBS) $(HDF5_FLAGS) $(HDF5_LIBS) + $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $(HDF5_FLAGS) $< $(OBJS) $(CODELIB) -o $@ $(GMP_LIBS) $(GSL_LIBS) $(HDF5_LIBS) .cc.o: $< $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 374c807430..f2b34e0a0d 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -11,7 +11,11 @@ CC = $(MPICC) # sets the C-compiler # GSL_FLAGS = -I/opt/Homebrew/include # TODO: Need to make GSL_FLAGS generalisable. CFLAGS += -Wall -g $(GSL_FLAGS) CXXFLAGS += $(CFLAGS) -LDFLAGS += -lm $(MUSE_LD_FLAGS) + +HDF5_FLAGS += -DH5_USE_16_API + +LDFLAGS += -lm $(MUSE_LD_FLAGS) $(HDF5_FLAGS) +LIBS += $(HDF5_LIBS) CODELIB = libarepo.a @@ -65,7 +69,7 @@ $(CODELIB): $(CODEOBJS) $(RANLIB) $@ .cc.o: $< - $(MPICXX) $(CXXFLAGS) -c -o $@ $< + $(MPICXX) $(CXXFLAGS) -c -o $@ $< $(LIBS) .c.o: $< - $(MPICC) $(CXXFLAGS) -c -o $@ $< + $(MPICC) $(CXXFLAGS) -c -o $@ $< $(LIBS) diff --git a/src/amuse/community/arepo/src/io/io.c b/src/amuse/community/arepo/src/io/io.c index f5d9a0c73f..afac5c7087 100644 --- a/src/amuse/community/arepo/src/io/io.c +++ b/src/amuse/community/arepo/src/io/io.c @@ -1247,7 +1247,7 @@ void write_file(char *fname, int writeTask, int lastTask, int subbox_flag) write_parameters_attributes_in_hdf5(hdf5_paramsgrp); hdf5_configgrp = my_H5Gcreate(hdf5_file, "/Config", 0); - write_compile_time_options_in_hdf5(hdf5_configgrp); + // write_compile_time_options_in_hdf5(hdf5_configgrp); #endif /* #ifdef HAVE_HDF5 */ } else From 817000a3c532beaf2f7e1c9ce7978951792be98f Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 22 Dec 2022 14:28:30 +0000 Subject: [PATCH 68/70] Add interface getters for gas pressure, density Also fill in gravitational acceleration getter. --- src/amuse/community/arepo/interface.cc | 40 ++++++++++++++++++++++++-- src/amuse/community/arepo/interface.py | 16 +++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index ff2e4b650e..55ebf8cfec 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -420,8 +420,14 @@ int set_position(int index_of_the_particle, double x, double y, double z){ int get_acceleration(int index_of_the_particle, double * ax, double * ay, double * az){ - // TODO: allvars.h defines a GravAccel vector if this is what we want? - return 0; + int p = find_particle_with_ID(index_of_the_particle); + if (p < 0) { + return p; + } + *ax = P[p].GravAccel[0]; + *ay = P[p].GravAccel[1]; + *az = P[p].GravAccel[2]; + return 0; } int commit_parameters(){ @@ -437,4 +443,34 @@ int set_velocity(int index_of_the_particle, double vx, double vy, return 0; } +int get_pressure(int index_of_the_particle, double * p){ + int p_idx = find_particle_with_ID(index_of_the_particle); + if (p_idx < 0) { + printf("AREPO: Particle with ID %d not found in P", index_of_the_particle); + return p_idx; + } + + if (P[p_idx].Type > 0){ + printf("AREPO: Particle with index %d not gas", index_of_the_particle); + return -2; + } + + *p = SphP[p_idx].Pressure; + return 0; +} + +int get_density(int index_of_the_particle, double * rho){ + int p_idx = find_particle_with_ID(index_of_the_particle); + if (p_idx < 0) { + printf("AREPO: Particle with ID %d not found in P", index_of_the_particle); + return p_idx; + } + + if (P[p_idx].Type > 0){ + printf("AREPO: Particle with index %d not gas", index_of_the_particle); + return -2; + } + *rho = SphP[p_idx].Density; + return 0; +} diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index a8e770a79d..126018c3a6 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -30,6 +30,22 @@ def __init__(self, **keyword_arguments): LiteratureReferencesMixIn.__init__(self) # TODO: Determine whether need to inherit from CodeWithDataDirectories. + @legacy_function + def get_pressure(): + function = LegacyFunctionSpecification() + function.addParameter("index_of_the_particle", dtype="int32", direction=function.IN) + function.addParameter("p", dtype="float64", direction=function.OUT) + function.result_type = "int32" + return function + + @legacy_function + def get_density(): + function = LegacyFunctionSpecification() + function.addParameter("index_of_the_particle", dtype="int32", direction=function.IN) + function.addParameter("rho", dtype="float64", direction=function.OUT) + function.result_type = "int32" + return function + # This function has been kept as a basic template for future functions. # @legacy_function # def set_parameters(): From 51facf0e51836b7ea7bfe7251632b0fe9c931fcf Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 22 Dec 2022 14:35:55 +0000 Subject: [PATCH 69/70] Re-add HAVE_HDF5 Arepo flag --- src/amuse/community/arepo/src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index f2b34e0a0d..7884a35e33 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -42,7 +42,7 @@ AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep cr AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) -# AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) +AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) # AREPOFLAGS += -DDEBUG # enables core-dumps From fa41c6933e01a543732a60d360c2845f6f6f1a13 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Thu, 22 Dec 2022 14:42:19 +0000 Subject: [PATCH 70/70] Change Makefiles + interface to run Noh 3D example This breaks the former "collisionless galaxy 3D" example, since the Arepo compiler flags are hard-coded into the Makefiles. Once the ICs file has been copied from the Arepo source (as outlined in ICs.placeholder) the new python script `test_noh3d.py` runs the example and saves figures of the output at t = 0.1, 1.0 and 1.9. --- src/amuse/community/arepo/ICs.placeholder | 3 +- src/amuse/community/arepo/Makefile | 38 ++++--- src/amuse/community/arepo/interface.cc | 107 ++++++++++++++++++- src/amuse/community/arepo/src/Makefile | 38 ++++--- src/amuse/community/arepo/test_noh3d.py | 122 ++++++++++++++++++++++ 5 files changed, 277 insertions(+), 31 deletions(-) create mode 100644 src/amuse/community/arepo/test_noh3d.py diff --git a/src/amuse/community/arepo/ICs.placeholder b/src/amuse/community/arepo/ICs.placeholder index 6185e7904d..1cb01a6dbd 100644 --- a/src/amuse/community/arepo/ICs.placeholder +++ b/src/amuse/community/arepo/ICs.placeholder @@ -1,3 +1,4 @@ File ICs taken from arepo examples: - arepo/examples/isolated_galaxy_collisionless_3d/ICs/ICs + - Noh 3D example: arepo/examples/noh_3d/IC.hdf5 + - isolated collisionless galaxy: arepo/examples/isolated_galaxy_collisionless_3d/ICs/ICs diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index fc9ac559c2..0e0e2b544b 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -5,22 +5,32 @@ ifeq ($(origin AMUSE_DIR), undefined) endif -include $(AMUSE_DIR)/config.mk -AREPOFLAGS += -DSELFGRAVITY # gravitational intraction between simulation particles/cells -AREPOFLAGS += -DHIERARCHICAL_GRAVITY # use hierarchical splitting of the time integration of the gravity -AREPOFLAGS += -DCELL_CENTER_GRAVITY # uses geometric centers to calculate gravity of cells, only possible with HIERARCHICAL_GRAVITY -AREPOFLAGS += -DALLOW_DIRECT_SUMMATION # Performed direct summation instead of tree-based gravity if number of active particles < DIRECT_SUMMATION_THRESHOLD (= 3000 unless specified differently here) -AREPOFLAGS += -DDIRECT_SUMMATION_THRESHOLD=500 # Overrides maximum number of active particles for which direct summation is performed instead of tree based calculation -AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # gravity is not treated periodically -AREPOFLAGS += -DNSOFTTYPES=2 # Number of different softening values to which particle types can be mapped. -AREPOFLAGS += -DMULTIPLE_NODE_SOFTENING # If a tree node is to be used which is softened, this is done with the softenings of its different mass components -AREPOFLAGS += -DINDIVIDUAL_GRAVITY_SOFTENING=32 # bitmask with particle types where the softenig type should be chosen with that of parttype 1 as a reference type -AREPOFLAGS += -DADAPTIVE_HYDRO_SOFTENING # Adaptive softening of gas cells depending on their size +## examples/Noh_3d/Config.sh +## config file for 3d Noh probelm + +#--------------------------------------- Basic operation mode of code +AREPOFLAGS += -DREFLECTIVE_X=2 # in-/outflow boundary conditions in x direction +AREPOFLAGS += -DREFLECTIVE_Y=2 # in-/outflow boundary conditions in y direction +AREPOFLAGS += -DREFLECTIVE_Z=2 # in-/outflow boundary conditions in z direction + +#--------------------------------------- Mesh motion and regularization +AREPOFLAGS += -DREGULARIZE_MESH_CM_DRIFT # Mesh regularization; Move mesh generating point towards center of mass to make cells rounder. +AREPOFLAGS += -DREGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED # Limit mesh regularization speed by local sound speed +AREPOFLAGS += -DREGULARIZE_MESH_FACE_ANGLE # Use maximum face angle as roundness criterion in mesh regularization + +#--------------------------------------- Time integration options AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep criterion (take 'signal speed' into account) + +#---------------------------------------- Single/Double Precision AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. -AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension -AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) -AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) -# AREPOFLAGS += -DDEBUG # enables core-dumps +AREPOFLAGS += -DINPUT_IN_DOUBLEPRECISION # initial conditions are in double precision +AREPOFLAGS += -DOUTPUT_CENTER_OF_MASS # output centers of cells + +#--------------------------------------- Output/Input options +AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired; should this be standard? + +#--------------------------------------- Testing and Debugging options +#AREPOFLAGS += -DDEBUG # enables core-dumps, should this be standard? HDF5_FLAGS += -DH5_USE_16_API diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 55ebf8cfec..0f05d16ab3 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -120,7 +120,9 @@ void set_default_parameters(){ // Mesh regularization options All.CellShapingSpeed = 0.5; - All.CellShapingFactor = 1.0; + #ifndef REGULARIZE_MESH_FACE_ANGLE // Compiler error if flag defined + All.CellShapingFactor = 1.0; + #endif // parameters that are fixed for AMUSE: All.TreeAllocFactor = 0.8; // Memory allocation parameter @@ -129,6 +131,105 @@ void set_default_parameters(){ All.GravityConstantInternal = 0; // Keep this turned off } +void set_noh_3d_parameters(){ + // Relevant files + strcpy(All.InitCondFile, "./IC"); + strcpy(All.OutputDir, "./output"); + strcpy(All.SnapshotFileBase, "snap"); + strcpy(All.OutputListFilename, "./output_list.txt"); + + All.ICFormat = 3; + + All.SnapFormat = 3; + All.NumFilesPerSnapshot = 1; + All.NumFilesWrittenInParallel = 1; + + All.ResubmitOn = 0; + strcpy(All.ResubmitCommand, "my-scriptfile"); + All.OutputListOn = 0; + + All.CoolingOn = 0; + All.StarformationOn = 0; + + All.Omega0 = 0.0; + All.OmegaBaryon = 0.0; + All.OmegaLambda = 0.0; + All.HubbleParam = 1.0; + + All.BoxSize = 6.0; + All.PeriodicBoundariesOn = 1; + All.ComovingIntegrationOn = 0; + + All.MaxMemSize = 2500; + + All.TimeOfFirstSnapshot = 10.0; + All.CpuTimeBetRestartFile = 9000; + All.TimeLimitCPU = 90000; + + All.TimeBetStatistics = 0.005; + All.TimeBegin = 0.0; + All.TimeMax = 2.0; + All.TimeBetSnapshot = 0.5; + + All.UnitVelocity_in_cm_per_s = 1.0; + All.UnitLength_in_cm = 1.0; + All.UnitMass_in_g = 1.0; + All.GravityConstantInternal = 0.0; + + All.ErrTolIntAccuracy = 0.1; + All.ErrTolTheta = 0.1; + All.ErrTolForceAcc = 0.1; + + All.MaxSizeTimestep = 0.5; + All.MinSizeTimestep = 1e-5; + All.CourantFac = 0.3; + + All.LimitUBelowThisDensity = 0.0; + All.LimitUBelowCertainDensityToThisValue = 0.0; + All.DesNumNgb = 64; + All.MaxNumNgbDeviation = 2; + + All.MultipleDomains = 2; + All.TopNodeFactor = 4; + All.ActivePartFracForNewDomainDecomp = 0.1; + + All.TypeOfTimestepCriterion = 0; + All.TypeOfOpeningCriterion = 1; + All.GasSoftFactor = 0.01; + + All.SofteningComoving[0] = 0.1; + All.SofteningComoving[1] = 0.1; + All.SofteningComoving[2] = 0.1; + All.SofteningComoving[3] = 0.1; + All.SofteningComoving[4] = 0.1; + All.SofteningComoving[5] = 0.1; + + All.SofteningMaxPhys[0] = 0.1; + All.SofteningMaxPhys[1] = 0.1; + All.SofteningMaxPhys[2] = 0.1; + All.SofteningMaxPhys[3] = 0.1; + All.SofteningMaxPhys[4] = 0.1; + All.SofteningMaxPhys[5] = 0.1; + + All.SofteningTypeOfPartType[0] = 0; + All.SofteningTypeOfPartType[1] = 1; + All.SofteningTypeOfPartType[2] = 1; + All.SofteningTypeOfPartType[3] = 1; + All.SofteningTypeOfPartType[4] = 1; + All.SofteningTypeOfPartType[5] = 1; + + All.InitGasTemp = 0.0; + All.MinGasTemp = 0.0; + All.MinEgySpec = 0.0; + All.MinimumDensityOnStartUp = 0.0; + + All.CellShapingSpeed = 0.5; + #ifdef REGULARIZE_MESH_FACE_ANGLE + All.CellMaxAngleFactor = 2.25; + #endif + +} + int initialize_code(){ MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); @@ -144,7 +245,9 @@ int initialize_code(){ // Needed to check available memory mpi_report_committable_memory(); - set_default_parameters(); + // set_default_parameters(); + set_noh_3d_parameters(); + RestartFlag = 0; diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 7884a35e33..ece94621d1 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -28,22 +28,32 @@ SRCS += main/allvars.c CODEOBJS = $(SRCS:c=o) CODEOBJS += main/allvars.o main/run.o main/main.o -AREPOFLAGS += -DSELFGRAVITY # gravitational intraction between simulation particles/cells -AREPOFLAGS += -DHIERARCHICAL_GRAVITY # use hierarchical splitting of the time integration of the gravity -AREPOFLAGS += -DCELL_CENTER_GRAVITY # uses geometric centers to calculate gravity of cells, only possible with HIERARCHICAL_GRAVITY -AREPOFLAGS += -DALLOW_DIRECT_SUMMATION # Performed direct summation instead of tree-based gravity if number of active particles < DIRECT_SUMMATION_THRESHOLD (= 3000 unless specified differently here) -AREPOFLAGS += -DDIRECT_SUMMATION_THRESHOLD=500 # Overrides maximum number of active particles for which direct summation is performed instead of tree based calculation -AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # gravity is not treated periodically -AREPOFLAGS += -DNSOFTTYPES=2 # Number of different softening values to which particle types can be mapped. -AREPOFLAGS += -DMULTIPLE_NODE_SOFTENING # If a tree node is to be used which is softened, this is done with the softenings of its different mass components -AREPOFLAGS += -DINDIVIDUAL_GRAVITY_SOFTENING=32 # bitmask with particle types where the softenig type should be chosen with that of parttype 1 as a reference type -AREPOFLAGS += -DADAPTIVE_HYDRO_SOFTENING # Adaptive softening of gas cells depending on their size +## examples/Noh_3d/Config.sh +## config file for 3d Noh probelm + +#--------------------------------------- Basic operation mode of code +AREPOFLAGS += -DREFLECTIVE_X=2 # in-/outflow boundary conditions in x direction +AREPOFLAGS += -DREFLECTIVE_Y=2 # in-/outflow boundary conditions in y direction +AREPOFLAGS += -DREFLECTIVE_Z=2 # in-/outflow boundary conditions in z direction + +#--------------------------------------- Mesh motion and regularization +AREPOFLAGS += -DREGULARIZE_MESH_CM_DRIFT # Mesh regularization; Move mesh generating point towards center of mass to make cells rounder. +AREPOFLAGS += -DREGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED # Limit mesh regularization speed by local sound speed +AREPOFLAGS += -DREGULARIZE_MESH_FACE_ANGLE # Use maximum face angle as roundness criterion in mesh regularization + +#--------------------------------------- Time integration options AREPOFLAGS += -DTREE_BASED_TIMESTEPS # non-local timestep criterion (take 'signal speed' into account) + +#---------------------------------------- Single/Double Precision AREPOFLAGS += -DDOUBLEPRECISION=1 # Mode of double precision: not defined: single; 1: full double precision 2: mixed, 3: mixed, fewer single precisions; unless short of memory, use 1. -AREPOFLAGS += -DNGB_TREE_DOUBLEPRECISION # if this is enabled, double precision is used for the neighbor node extension -AREPOFLAGS += -DPROCESS_TIMES_OF_OUTPUTLIST # goes through times of output list prior to starting the simulaiton to ensure that outputs are written as close to the desired time as possible (as opposed to at next possible time if this flag is not active) -AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired (recommended) -# AREPOFLAGS += -DDEBUG # enables core-dumps +AREPOFLAGS += -DINPUT_IN_DOUBLEPRECISION # initial conditions are in double precision +AREPOFLAGS += -DOUTPUT_CENTER_OF_MASS # output centers of cells + +#--------------------------------------- Output/Input options +AREPOFLAGS += -DHAVE_HDF5 # needed when HDF5 I/O support is desired; should this be standard? + +#--------------------------------------- Testing and Debugging options +#AREPOFLAGS += -DDEBUG # enables core-dumps, should this be standard? CXXFLAGS += $(AREPOFLAGS) diff --git a/src/amuse/community/arepo/test_noh3d.py b/src/amuse/community/arepo/test_noh3d.py new file mode 100644 index 0000000000..be97e93453 --- /dev/null +++ b/src/amuse/community/arepo/test_noh3d.py @@ -0,0 +1,122 @@ +import random +from amuse.community.arepo import Arepo +import numpy as np +from matplotlib import pyplot as plt +from matplotlib import cm +# import unit seconds as s + +N_PLOT_PARTICLES = 3000 + +# Check code runs without errors +x = Arepo(redirection="none") +x.initialize_code() + +n_particles_total = x.get_number_of_particles() +print('AMUSE: number of particles: {}'.format(n_particles_total)) +random.seed(123) +tracked_ids = random.sample(range(n_particles_total), k=N_PLOT_PARTICLES) + + +def dist(p0, p1): + return np.linalg.norm(np.array(p1) - np.array(p0)) + + +positions = {id: [x.get_position(id)] for id in tracked_ids} +print(x.get_position(30)) + +print("Evolving") +x.evolve_model(0.00001) + +for id in tracked_ids: + positions[id].append(x.get_position(id)) + +print("Evolving another step") +x.evolve_model(0.00002) + +final_densities = {} + +for id in tracked_ids: + positions[id].append(x.get_position(id)) + final_densities[id] = x.get_density(id) + +max_density = max(final_densities.values()) +min_density = min(final_densities.values()) + +for id, _ in zip(positions, range(5)): + print(id) + print(positions[id]) + print(dist(positions[id][0], positions[id][-1])) + print() + + +# Plot1 +fig, ax = plt.subplots(subplot_kw=dict(projection='3d')) +cmap = cm.plasma +for id, pos in positions.items(): + ax.plot(*np.array(pos).T, color=cmap((final_densities[id] - min_density)/(max_density - min_density))) +fig.suptitle("Densities: t = {}".format(x.get_time())) +fig.savefig("noh_positions_0.png") + +# Second plot at t = 1 +print("Evolving another step") +x.evolve_model(1.0 - 0.00005) + +for id in tracked_ids: + positions[id].append(x.get_position(id)) + +x.evolve_model(1.0) + +final_densities = {} + +for id in tracked_ids: + positions[id].append(x.get_position(id)) + final_densities[id] = x.get_density(id) + +max_density = max(final_densities.values()) +min_density = min(final_densities.values()) + +for id, _ in zip(positions, range(5)): + print(id) + print(positions[id]) + print(dist(positions[id][0], positions[id][-1])) + print() + +# Plot2 +fig, ax = plt.subplots(subplot_kw=dict(projection='3d')) +cmap = cm.plasma +for id, pos in positions.items(): + ax.plot(*np.array(pos[-2:]).T, color=cmap((final_densities[id] - min_density)/(max_density - min_density))) +fig.suptitle("Densities: t = {}".format(x.get_time())) +fig.savefig("noh_positions_1.png") + +# Third plot at t = 2 +print("Evolving another step") +x.evolve_model(1.9 - 0.00002) + +for id in tracked_ids: + positions[id].append(x.get_position(id)) + +x.evolve_model(1.9) + +final_densities = {} + +for id in tracked_ids: + positions[id].append(x.get_position(id)) + final_densities[id] = x.get_density(id) + +max_density = max(final_densities.values()) +min_density = min(final_densities.values()) + +for id, _ in zip(positions, range(5)): + print(id) + print(positions[id]) + print(dist(positions[id][0], positions[id][-1])) + print() + +# Plot2 +fig, ax = plt.subplots(subplot_kw=dict(projection='3d')) +cmap = cm.plasma +for id, pos in positions.items(): + ax.plot(*np.array(pos[-2:]).T, color=cmap((final_densities[id] - min_density)/(max_density - min_density))) +fig.suptitle("Densities: t = {}".format(x.get_time())) +fig.savefig("noh_positions_2.png")