Again

openfisca · Oct 9, 2023 · 8043732 · 8043732
1 parent f8bdf37
commit 8043732
Show file tree

Hide file tree

Showing 9 changed files with 220 additions and 892 deletions.
diff --git a/openfisca_survey_manager/scenarios/abstract_scenario.py b/openfisca_survey_manager/scenarios/abstract_scenario.py
diff --git a/openfisca_survey_manager/scenarios/reform_scenario.py b/openfisca_survey_manager/scenarios/reform_scenario.py
diff --git a/openfisca_survey_manager/tests/test_calibration.py b/openfisca_survey_manager/tests/test_calibration.py
@@ -24,7 +24,8 @@ def test_calibration_variable_entity_is_weight_entity():
         parameters = {"method": "raking ratio"},
         )
 
-    assert all(survey_scenario.simulation.calibration.weight != survey_scenario.simulation.calibration.initial_weight)
+    for simulation_name, simulation in survey_scenario.simulations.items():
+        assert all(simulation.calibration.weight != simulation.calibration.initial_weight)
 
     assert_near(survey_scenario.compute_aggregate("rent", period = period), target_rent_aggregate)
 
@@ -55,7 +56,7 @@ def test_simulation_calibration_variable_entity_is_weight_entity():
     survey_scenario = create_randomly_initialized_survey_scenario(collection=None)
     period = "2017-01"
     survey_scenario.period = period
-    simulation = survey_scenario.simulation
+    simulation = list(survey_scenario.simulations.values())[0]
     person_weight_before = simulation.calculate("person_weight", period)
 
     # initial_rent_aggregate = simulation.compute_aggregate("rent", period = period)

diff --git a/openfisca_survey_manager/tests/test_compute_aggregate.py b/openfisca_survey_manager/tests/test_compute_aggregate.py
@@ -7,23 +7,25 @@ def test_compute_aggregate():
     period = "2017-01"
     variable = "social_security_contribution"
 
-    aggregate_after = survey_scenario.compute_aggregate(variable, period = period)
-    aggregate_before = survey_scenario.compute_aggregate(variable, period = period, use_baseline = True)
+    aggregate_after = survey_scenario.compute_aggregate(variable, period = period, simulation = "reform")
+    aggregate_before = survey_scenario.compute_aggregate(variable, period = period, simulation = "baseline")
 
     assert aggregate_after > aggregate_before
 
-    survey_scenario.calculate_variable("social_security_contribution", period = period)
-    survey_scenario.calculate_variable("salary", period = period)
+    survey_scenario.calculate_variable("social_security_contribution", period = period, simulation = "reform")
+    survey_scenario.calculate_variable("salary", period = period, simulation = "reform")
 
     assert 0 == survey_scenario.compute_aggregate(
         "social_security_contribution",
         period = period,
+        simulation = "reform",
         filter_by = "salary < 3000",
         )
 
     assert 34489 == survey_scenario.compute_aggregate(
         "social_security_contribution",
         period = period,
+        simulation = "reform",
         filter_by = "3000 < salary < 10000",
         ).astype(int)
 
@@ -32,5 +34,6 @@ def test_compute_aggregate():
     assert 576 == survey_scenario.compute_aggregate(
         "social_security_contribution",
         period = period,
+        simulation = "reform",
         filter_by = "3000 < salary < 10000",
         ).astype(int)
diff --git a/openfisca_survey_manager/tests/test_compute_winners_loosers.py b/openfisca_survey_manager/tests/test_compute_winners_loosers.py
@@ -11,8 +11,8 @@ def test_compute_winners_loosers_basics():
     period = survey_scenario.period
     variable = "pension"
 
-    simulation = survey_scenario.simulation
-    baseline_simulation = simulation
+    simulation = survey_scenario.simulations["baseline"]
+    baseline_simulation = survey_scenario.simulations["baseline"]
 
     simulation.adaptative_calculate_variable(variable, period = period)
     absolute_minimal_detected_variation = 1
@@ -46,13 +46,8 @@ def test_compute_winners_loosers():
     period = survey_scenario.period
     variable = "social_security_contribution"
 
-    aggregate_after = survey_scenario.compute_aggregate(variable, period = period)
-    aggregate_before = survey_scenario.compute_aggregate(variable, period = period, use_baseline = True)
-
-    assert aggregate_after > aggregate_before
-
-    simulation = survey_scenario.simulation
-    baseline_simulation = survey_scenario.baseline_simulation
+    simulation = survey_scenario.simulations["reform"]
+    baseline_simulation = survey_scenario.simulations["baseline"]
 
     absolute_minimal_detected_variation = .9
     relative_minimal_detected_variation = .05
@@ -69,6 +64,8 @@ def test_compute_winners_loosers():
 
     winners_loosers_scenario = survey_scenario.compute_winners_loosers(
         variable,
+        simulation = "reform",
+        baseline_simulation = "baseline",
         period = period,
         absolute_minimal_detected_variation = absolute_minimal_detected_variation,
         relative_minimal_detected_variation = relative_minimal_detected_variation,

diff --git a/openfisca_survey_manager/tests/test_create_data_frame_by_entity.py b/openfisca_survey_manager/tests/test_create_data_frame_by_entity.py
@@ -10,7 +10,10 @@ class TestCreateDataFrameByEntity(unittest.TestCase):
     def test_create_data_frame_by_entity(self):
         survey_scenario = create_randomly_initialized_survey_scenario()
         period = '2017-01'
-        df_by_entity = survey_scenario.create_data_frame_by_entity(variables = ['salary', 'rent'], period = period)
+        df_by_entity = survey_scenario.create_data_frame_by_entity(
+            variables = ['salary', 'rent'],
+            period = period,
+            )
         salary = survey_scenario.calculate_variable('salary', period = period)
         rent = survey_scenario.calculate_variable('rent', period = period)
         for entity, df in df_by_entity.items():

diff --git a/openfisca_survey_manager/tests/test_marginal_tax_rate.py b/openfisca_survey_manager/tests/test_marginal_tax_rate.py
@@ -17,13 +17,13 @@
 
 def test_compute_marginal_tax_rate():
     survey_scenario = create_randomly_initialized_survey_scenario(use_marginal_tax_rate = True)
-    assert survey_scenario._modified_simulation is not None
+    assert "_modified_baseline" in survey_scenario.simulations
     assert_near(
-        survey_scenario.compute_marginal_tax_rate(target_variable = 'income_tax', period = 2017),
+        survey_scenario.compute_marginal_tax_rate(target_variable = 'income_tax', period = 2017, simulation = "baseline"),
         (1 - .15),
         relative_error_margin = 1e-6,
         )
-    survey_scenario.compute_marginal_tax_rate(target_variable = 'disposable_income', period = 2017)
+    survey_scenario.compute_marginal_tax_rate(target_variable = 'disposable_income', period = 2017, simulation = "baseline")
 
 
 if __name__ == "__main__":

diff --git a/openfisca_survey_manager/tests/test_quantile.py b/openfisca_survey_manager/tests/test_quantile.py
@@ -7,7 +7,7 @@
 from openfisca_core.model_api import Variable, YEAR
 from openfisca_core.entities import build_entity
 from openfisca_core.taxbenefitsystems import TaxBenefitSystem
-from openfisca_survey_manager.scenarios import AbstractSurveyScenario
+from openfisca_survey_manager.scenarios.abstract_scenario import AbstractSurveyScenario
 from openfisca_survey_manager.statshelpers import mark_weighted_percentiles
 from openfisca_survey_manager.variables import quantile
 
@@ -80,10 +80,18 @@ def __init__(self, input_data_frame = None, tax_benefit_system = None,
         self.period = period
         if tax_benefit_system is None:
             tax_benefit_system = QuantileTestTaxBenefitSystem()
-        self.set_tax_benefit_systems(
-            tax_benefit_system = tax_benefit_system,
-            baseline_tax_benefit_system = baseline_tax_benefit_system
+
+        tax_benefit_systems = (
+            dict(
+                reform = tax_benefit_system,
+                baseline = baseline_tax_benefit_system
+                )
+            if baseline_tax_benefit_system
+            else dict(baseline = tax_benefit_system)
             )
+
+        self.set_tax_benefit_systems(tax_benefit_systems)
+
         self.used_as_input_variables = list(
             set(tax_benefit_system.variables.keys()).intersection(
                 set(input_data_frame.columns)
@@ -119,7 +127,9 @@ def test_quantile():
     data = np.linspace(1, 11 - 1e-5, size)
     target = np.floor(data)
     result = survey_scenario.calculate_variable(
-        variable = 'decile_salaire_from_quantile', period = '2017'
+        variable = 'decile_salaire_from_quantile',
+        period = '2017',
+        simulation = "baseline",
         )
     assert all(
         (result == target) + (abs(result - target + 1) < .001)  # Finite size problem handling

diff --git a/openfisca_survey_manager/tests/test_scenario.py b/openfisca_survey_manager/tests/test_scenario.py
@@ -14,7 +14,7 @@
     random_data_generator,
     randomly_init_variable,
     )
-from openfisca_survey_manager.scenarios import AbstractSurveyScenario
+from openfisca_survey_manager.scenarios.abstract_scenario import AbstractSurveyScenario
 from openfisca_survey_manager.tests import tax_benefit_system
 
 
@@ -59,12 +59,12 @@ def create_randomly_initialized_survey_scenario_from_table(nb_persons, nb_groups
         variable_generators_by_period, collection)
     survey_scenario = AbstractSurveyScenario()
     if reform is None:
-        survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
+        survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
     else:
-        survey_scenario.set_tax_benefit_systems(
-            tax_benefit_system = reform(tax_benefit_system),
-            baseline_tax_benefit_system = tax_benefit_system,
-            )
+        survey_scenario.set_tax_benefit_systems(dict(
+            reform = reform(tax_benefit_system),
+            baseline = tax_benefit_system,
+            ))
 
     survey_scenario.used_as_input_variables = ['salary', 'rent', 'housing_occupancy_status', 'household_weight']
     survey_scenario.period = 2017
@@ -92,14 +92,13 @@ def create_randomly_initialized_survey_scenario_from_data_frame(nb_persons, nb_g
         "person": "person_weight",
         "household": "household_weight",
         }
-    survey_scenario.set_weight_variable_by_entity(weight_variable_by_entity)
     if reform is None:
-        survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
+        survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
     else:
-        survey_scenario.set_tax_benefit_systems(
-            tax_benefit_system = reform(tax_benefit_system),
-            baseline_tax_benefit_system = tax_benefit_system,
-            )
+        survey_scenario.set_tax_benefit_systems(dict(
+            reform = reform(tax_benefit_system),
+            baseline = tax_benefit_system,
+            ))
     survey_scenario.period = 2017
     survey_scenario.used_as_input_variables = ['salary', 'rent', 'household_weight']
     period = periods.period('2017-01')
@@ -109,10 +108,12 @@ def create_randomly_initialized_survey_scenario_from_data_frame(nb_persons, nb_g
             period: input_data_frame_by_entity
             }
         }
+    survey_scenario.set_weight_variable_by_entity(weight_variable_by_entity)
     assert survey_scenario.weight_variable_by_entity == weight_variable_by_entity
     survey_scenario.init_from_data(data = data)
-    assert survey_scenario.simulation.weight_variable_by_entity == weight_variable_by_entity
-    assert (survey_scenario.calculate_series("household_weight", period) != 0).all()
+    for simulation_name, simulation in survey_scenario.simulations.items():
+        assert simulation.weight_variable_by_entity == weight_variable_by_entity, f"{simulation_name} weight_variable_by_entity does not match {weight_variable_by_entity}"
+        assert (survey_scenario.calculate_series("household_weight", period, simulation = simulation_name) != 0).all()
     return survey_scenario
 
 
@@ -171,6 +172,7 @@ def test_init_from_data(nb_persons = 10, nb_groups = 5, salary_max_value = 50000
 
     # Set up test : the minimum necessary data to perform an `init_from_data`
     survey_scenario = AbstractSurveyScenario()
+    assert survey_scenario.simulations is None
     # Generate some data and its period
     input_data_frame_by_entity = generate_input_input_dataframe_by_entity(
         nb_persons, nb_groups, salary_max_value, rent_max_value)
@@ -187,19 +189,21 @@ def test_init_from_data(nb_persons = 10, nb_groups = 5, salary_max_value = 50000
     # print(table_ind)
 
     # We must add a TBS to the scenario to indicate what are the entities
-    survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
+    survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
+    assert len(survey_scenario.tax_benefit_systems) == 1
+    assert list(survey_scenario.tax_benefit_systems.keys()) == ["baseline"]
+    assert survey_scenario.simulations is None
     # We must add the `used_as_input_variables` even though they don't seem necessary
     survey_scenario.used_as_input_variables = ['salary', 'rent', 'household_weight']
     # We must add the year to initiate a .new_simulation
     survey_scenario.period = 2017
     # Then we can input the data+period dict inside the scenario
     survey_scenario.init_from_data(data = data_in)
-
+    assert len(survey_scenario.simulations) == 1
     # We are looking for the dataframes inside the survey_scenario
     all_var = list(set(list(table_ind.columns) + list(table_men.columns)))
     # print('Variables', all_var)
     data_out = survey_scenario.create_data_frame_by_entity(variables = all_var, period = period, merge = False)
-    # data_out =  survey_scenario.create_data_frame_by_entity(variables = all_var, period = period, merge = True)
 
     # 1 - Has the data object changed ? We only compare variables because Id's and others are lost in the process
     for cols in table_ind:
@@ -217,35 +221,13 @@ def test_init_from_data(nb_persons = 10, nb_groups = 5, salary_max_value = 50000
     assert data_out['household']['rent'].equals(table_men['rent'])
 
 
-# def test_used_as_input_variables():
-#    # Set up test
-#    #
-#    #
-#
-#
-#    ## test filter_input_variables OU quelle fct pour tester used_as_input_variables ?
-#    # 2 - If we filter the input variables, are they still in the database?
-#    survey_scenario.used_as_input_variables = ['rent']
-#    survey_scenario.filter_input_variables()
-#
-#    assert 'rent' in base
-#    assert 'salary' not base
-#
-#    # 3 - Faut-il recalculer la base?
-#    base2 = survey_scenario.input_data_table_by_period  # ??
-#    assert base2 == base
-#
-#    # 4 - If we perform a simulation, are they still in the database?
-#    survey do simulation
-
-
 def test_survey_scenario_input_dataframe_import(nb_persons = 10, nb_groups = 5, salary_max_value = 50000,
         rent_max_value = 1000):
 
     input_data_frame_by_entity = generate_input_input_dataframe_by_entity(
         nb_persons, nb_groups, salary_max_value, rent_max_value)
     survey_scenario = AbstractSurveyScenario()
-    survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
+    survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
     survey_scenario.period = 2017
     survey_scenario.used_as_input_variables = ['salary', 'rent']
     period = periods.period('2017-01')
@@ -256,7 +238,7 @@ def test_survey_scenario_input_dataframe_import(nb_persons = 10, nb_groups = 5,
         }
     survey_scenario.init_from_data(data = data)
 
-    simulation = survey_scenario.simulation
+    simulation = survey_scenario.simulations["baseline"]
     assert (
         simulation.calculate('salary', period) == input_data_frame_by_entity['person']['salary']
         ).all()
@@ -274,7 +256,7 @@ def test_survey_scenario_input_dataframe_import_scrambled_ids(nb_persons = 10, n
         nb_persons, nb_groups, salary_max_value, rent_max_value)  # Un dataframe d'exemple que l'on injecte
     input_data_frame_by_entity['person']['household_id'] = 4 - input_data_frame_by_entity['person']['household_id']
     survey_scenario = AbstractSurveyScenario()
-    survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
+    survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
     survey_scenario.period = 2017
     survey_scenario.used_as_input_variables = ['salary', 'rent']
     period = periods.period('2017-01')
@@ -284,7 +266,7 @@ def test_survey_scenario_input_dataframe_import_scrambled_ids(nb_persons = 10, n
             }
         }
     survey_scenario.init_from_data(data = data)
-    simulation = survey_scenario.simulation
+    simulation = survey_scenario.simulations["baseline"]
     period = periods.period('2017-01')
     assert (
         simulation.calculate('salary', period) == input_data_frame_by_entity['person']['salary']
@@ -315,7 +297,7 @@ def test_dump_survey_scenario():
     assert not person.empty
     del survey_scenario
     survey_scenario = AbstractSurveyScenario()
-    survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
+    survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
     survey_scenario.used_as_input_variables = ['salary', 'rent']
     survey_scenario.period = 2017
     survey_scenario.restore_simulations(directory = directory)