Skip to content

Commit

Permalink
Again
Browse files Browse the repository at this point in the history
  • Loading branch information
benjello committed Oct 9, 2023
1 parent f8bdf37 commit 8043732
Show file tree
Hide file tree
Showing 9 changed files with 220 additions and 892 deletions.
162 changes: 85 additions & 77 deletions openfisca_survey_manager/scenarios/abstract_scenario.py

Large diffs are not rendered by default.

818 changes: 71 additions & 747 deletions openfisca_survey_manager/scenarios/reform_scenario.py

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions openfisca_survey_manager/tests/test_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def test_calibration_variable_entity_is_weight_entity():
parameters = {"method": "raking ratio"},
)

assert all(survey_scenario.simulation.calibration.weight != survey_scenario.simulation.calibration.initial_weight)
for simulation_name, simulation in survey_scenario.simulations.items():
assert all(simulation.calibration.weight != simulation.calibration.initial_weight)

assert_near(survey_scenario.compute_aggregate("rent", period = period), target_rent_aggregate)

Expand Down Expand Up @@ -55,7 +56,7 @@ def test_simulation_calibration_variable_entity_is_weight_entity():
survey_scenario = create_randomly_initialized_survey_scenario(collection=None)
period = "2017-01"
survey_scenario.period = period
simulation = survey_scenario.simulation
simulation = list(survey_scenario.simulations.values())[0]
person_weight_before = simulation.calculate("person_weight", period)

# initial_rent_aggregate = simulation.compute_aggregate("rent", period = period)
Expand Down
11 changes: 7 additions & 4 deletions openfisca_survey_manager/tests/test_compute_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,25 @@ def test_compute_aggregate():
period = "2017-01"
variable = "social_security_contribution"

aggregate_after = survey_scenario.compute_aggregate(variable, period = period)
aggregate_before = survey_scenario.compute_aggregate(variable, period = period, use_baseline = True)
aggregate_after = survey_scenario.compute_aggregate(variable, period = period, simulation = "reform")
aggregate_before = survey_scenario.compute_aggregate(variable, period = period, simulation = "baseline")

assert aggregate_after > aggregate_before

survey_scenario.calculate_variable("social_security_contribution", period = period)
survey_scenario.calculate_variable("salary", period = period)
survey_scenario.calculate_variable("social_security_contribution", period = period, simulation = "reform")
survey_scenario.calculate_variable("salary", period = period, simulation = "reform")

assert 0 == survey_scenario.compute_aggregate(
"social_security_contribution",
period = period,
simulation = "reform",
filter_by = "salary < 3000",
)

assert 34489 == survey_scenario.compute_aggregate(
"social_security_contribution",
period = period,
simulation = "reform",
filter_by = "3000 < salary < 10000",
).astype(int)

Expand All @@ -32,5 +34,6 @@ def test_compute_aggregate():
assert 576 == survey_scenario.compute_aggregate(
"social_security_contribution",
period = period,
simulation = "reform",
filter_by = "3000 < salary < 10000",
).astype(int)
15 changes: 6 additions & 9 deletions openfisca_survey_manager/tests/test_compute_winners_loosers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ def test_compute_winners_loosers_basics():
period = survey_scenario.period
variable = "pension"

simulation = survey_scenario.simulation
baseline_simulation = simulation
simulation = survey_scenario.simulations["baseline"]
baseline_simulation = survey_scenario.simulations["baseline"]

simulation.adaptative_calculate_variable(variable, period = period)
absolute_minimal_detected_variation = 1
Expand Down Expand Up @@ -46,13 +46,8 @@ def test_compute_winners_loosers():
period = survey_scenario.period
variable = "social_security_contribution"

aggregate_after = survey_scenario.compute_aggregate(variable, period = period)
aggregate_before = survey_scenario.compute_aggregate(variable, period = period, use_baseline = True)

assert aggregate_after > aggregate_before

simulation = survey_scenario.simulation
baseline_simulation = survey_scenario.baseline_simulation
simulation = survey_scenario.simulations["reform"]
baseline_simulation = survey_scenario.simulations["baseline"]

absolute_minimal_detected_variation = .9
relative_minimal_detected_variation = .05
Expand All @@ -69,6 +64,8 @@ def test_compute_winners_loosers():

winners_loosers_scenario = survey_scenario.compute_winners_loosers(
variable,
simulation = "reform",
baseline_simulation = "baseline",
period = period,
absolute_minimal_detected_variation = absolute_minimal_detected_variation,
relative_minimal_detected_variation = relative_minimal_detected_variation,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ class TestCreateDataFrameByEntity(unittest.TestCase):
def test_create_data_frame_by_entity(self):
survey_scenario = create_randomly_initialized_survey_scenario()
period = '2017-01'
df_by_entity = survey_scenario.create_data_frame_by_entity(variables = ['salary', 'rent'], period = period)
df_by_entity = survey_scenario.create_data_frame_by_entity(
variables = ['salary', 'rent'],
period = period,
)
salary = survey_scenario.calculate_variable('salary', period = period)
rent = survey_scenario.calculate_variable('rent', period = period)
for entity, df in df_by_entity.items():
Expand Down
6 changes: 3 additions & 3 deletions openfisca_survey_manager/tests/test_marginal_tax_rate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@

def test_compute_marginal_tax_rate():
survey_scenario = create_randomly_initialized_survey_scenario(use_marginal_tax_rate = True)
assert survey_scenario._modified_simulation is not None
assert "_modified_baseline" in survey_scenario.simulations
assert_near(
survey_scenario.compute_marginal_tax_rate(target_variable = 'income_tax', period = 2017),
survey_scenario.compute_marginal_tax_rate(target_variable = 'income_tax', period = 2017, simulation = "baseline"),
(1 - .15),
relative_error_margin = 1e-6,
)
survey_scenario.compute_marginal_tax_rate(target_variable = 'disposable_income', period = 2017)
survey_scenario.compute_marginal_tax_rate(target_variable = 'disposable_income', period = 2017, simulation = "baseline")


if __name__ == "__main__":
Expand Down
20 changes: 15 additions & 5 deletions openfisca_survey_manager/tests/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from openfisca_core.model_api import Variable, YEAR
from openfisca_core.entities import build_entity
from openfisca_core.taxbenefitsystems import TaxBenefitSystem
from openfisca_survey_manager.scenarios import AbstractSurveyScenario
from openfisca_survey_manager.scenarios.abstract_scenario import AbstractSurveyScenario
from openfisca_survey_manager.statshelpers import mark_weighted_percentiles
from openfisca_survey_manager.variables import quantile

Expand Down Expand Up @@ -80,10 +80,18 @@ def __init__(self, input_data_frame = None, tax_benefit_system = None,
self.period = period
if tax_benefit_system is None:
tax_benefit_system = QuantileTestTaxBenefitSystem()
self.set_tax_benefit_systems(
tax_benefit_system = tax_benefit_system,
baseline_tax_benefit_system = baseline_tax_benefit_system

tax_benefit_systems = (
dict(
reform = tax_benefit_system,
baseline = baseline_tax_benefit_system
)
if baseline_tax_benefit_system
else dict(baseline = tax_benefit_system)
)

self.set_tax_benefit_systems(tax_benefit_systems)

self.used_as_input_variables = list(
set(tax_benefit_system.variables.keys()).intersection(
set(input_data_frame.columns)
Expand Down Expand Up @@ -119,7 +127,9 @@ def test_quantile():
data = np.linspace(1, 11 - 1e-5, size)
target = np.floor(data)
result = survey_scenario.calculate_variable(
variable = 'decile_salaire_from_quantile', period = '2017'
variable = 'decile_salaire_from_quantile',
period = '2017',
simulation = "baseline",
)
assert all(
(result == target) + (abs(result - target + 1) < .001) # Finite size problem handling
Expand Down
70 changes: 26 additions & 44 deletions openfisca_survey_manager/tests/test_scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
random_data_generator,
randomly_init_variable,
)
from openfisca_survey_manager.scenarios import AbstractSurveyScenario
from openfisca_survey_manager.scenarios.abstract_scenario import AbstractSurveyScenario
from openfisca_survey_manager.tests import tax_benefit_system


Expand Down Expand Up @@ -59,12 +59,12 @@ def create_randomly_initialized_survey_scenario_from_table(nb_persons, nb_groups
variable_generators_by_period, collection)
survey_scenario = AbstractSurveyScenario()
if reform is None:
survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
else:
survey_scenario.set_tax_benefit_systems(
tax_benefit_system = reform(tax_benefit_system),
baseline_tax_benefit_system = tax_benefit_system,
)
survey_scenario.set_tax_benefit_systems(dict(
reform = reform(tax_benefit_system),
baseline = tax_benefit_system,
))

survey_scenario.used_as_input_variables = ['salary', 'rent', 'housing_occupancy_status', 'household_weight']
survey_scenario.period = 2017
Expand Down Expand Up @@ -92,14 +92,13 @@ def create_randomly_initialized_survey_scenario_from_data_frame(nb_persons, nb_g
"person": "person_weight",
"household": "household_weight",
}
survey_scenario.set_weight_variable_by_entity(weight_variable_by_entity)
if reform is None:
survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
else:
survey_scenario.set_tax_benefit_systems(
tax_benefit_system = reform(tax_benefit_system),
baseline_tax_benefit_system = tax_benefit_system,
)
survey_scenario.set_tax_benefit_systems(dict(
reform = reform(tax_benefit_system),
baseline = tax_benefit_system,
))
survey_scenario.period = 2017
survey_scenario.used_as_input_variables = ['salary', 'rent', 'household_weight']
period = periods.period('2017-01')
Expand All @@ -109,10 +108,12 @@ def create_randomly_initialized_survey_scenario_from_data_frame(nb_persons, nb_g
period: input_data_frame_by_entity
}
}
survey_scenario.set_weight_variable_by_entity(weight_variable_by_entity)
assert survey_scenario.weight_variable_by_entity == weight_variable_by_entity
survey_scenario.init_from_data(data = data)
assert survey_scenario.simulation.weight_variable_by_entity == weight_variable_by_entity
assert (survey_scenario.calculate_series("household_weight", period) != 0).all()
for simulation_name, simulation in survey_scenario.simulations.items():
assert simulation.weight_variable_by_entity == weight_variable_by_entity, f"{simulation_name} weight_variable_by_entity does not match {weight_variable_by_entity}"
assert (survey_scenario.calculate_series("household_weight", period, simulation = simulation_name) != 0).all()
return survey_scenario


Expand Down Expand Up @@ -171,6 +172,7 @@ def test_init_from_data(nb_persons = 10, nb_groups = 5, salary_max_value = 50000

# Set up test : the minimum necessary data to perform an `init_from_data`
survey_scenario = AbstractSurveyScenario()
assert survey_scenario.simulations is None
# Generate some data and its period
input_data_frame_by_entity = generate_input_input_dataframe_by_entity(
nb_persons, nb_groups, salary_max_value, rent_max_value)
Expand All @@ -187,19 +189,21 @@ def test_init_from_data(nb_persons = 10, nb_groups = 5, salary_max_value = 50000
# print(table_ind)

# We must add a TBS to the scenario to indicate what are the entities
survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
assert len(survey_scenario.tax_benefit_systems) == 1
assert list(survey_scenario.tax_benefit_systems.keys()) == ["baseline"]
assert survey_scenario.simulations is None
# We must add the `used_as_input_variables` even though they don't seem necessary
survey_scenario.used_as_input_variables = ['salary', 'rent', 'household_weight']
# We must add the year to initiate a .new_simulation
survey_scenario.period = 2017
# Then we can input the data+period dict inside the scenario
survey_scenario.init_from_data(data = data_in)

assert len(survey_scenario.simulations) == 1
# We are looking for the dataframes inside the survey_scenario
all_var = list(set(list(table_ind.columns) + list(table_men.columns)))
# print('Variables', all_var)
data_out = survey_scenario.create_data_frame_by_entity(variables = all_var, period = period, merge = False)
# data_out = survey_scenario.create_data_frame_by_entity(variables = all_var, period = period, merge = True)

# 1 - Has the data object changed ? We only compare variables because Id's and others are lost in the process
for cols in table_ind:
Expand All @@ -217,35 +221,13 @@ def test_init_from_data(nb_persons = 10, nb_groups = 5, salary_max_value = 50000
assert data_out['household']['rent'].equals(table_men['rent'])


# def test_used_as_input_variables():
# # Set up test
# #
# #
#
#
# ## test filter_input_variables OU quelle fct pour tester used_as_input_variables ?
# # 2 - If we filter the input variables, are they still in the database?
# survey_scenario.used_as_input_variables = ['rent']
# survey_scenario.filter_input_variables()
#
# assert 'rent' in base
# assert 'salary' not base
#
# # 3 - Faut-il recalculer la base?
# base2 = survey_scenario.input_data_table_by_period # ??
# assert base2 == base
#
# # 4 - If we perform a simulation, are they still in the database?
# survey do simulation


def test_survey_scenario_input_dataframe_import(nb_persons = 10, nb_groups = 5, salary_max_value = 50000,
rent_max_value = 1000):

input_data_frame_by_entity = generate_input_input_dataframe_by_entity(
nb_persons, nb_groups, salary_max_value, rent_max_value)
survey_scenario = AbstractSurveyScenario()
survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
survey_scenario.period = 2017
survey_scenario.used_as_input_variables = ['salary', 'rent']
period = periods.period('2017-01')
Expand All @@ -256,7 +238,7 @@ def test_survey_scenario_input_dataframe_import(nb_persons = 10, nb_groups = 5,
}
survey_scenario.init_from_data(data = data)

simulation = survey_scenario.simulation
simulation = survey_scenario.simulations["baseline"]
assert (
simulation.calculate('salary', period) == input_data_frame_by_entity['person']['salary']
).all()
Expand All @@ -274,7 +256,7 @@ def test_survey_scenario_input_dataframe_import_scrambled_ids(nb_persons = 10, n
nb_persons, nb_groups, salary_max_value, rent_max_value) # Un dataframe d'exemple que l'on injecte
input_data_frame_by_entity['person']['household_id'] = 4 - input_data_frame_by_entity['person']['household_id']
survey_scenario = AbstractSurveyScenario()
survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
survey_scenario.period = 2017
survey_scenario.used_as_input_variables = ['salary', 'rent']
period = periods.period('2017-01')
Expand All @@ -284,7 +266,7 @@ def test_survey_scenario_input_dataframe_import_scrambled_ids(nb_persons = 10, n
}
}
survey_scenario.init_from_data(data = data)
simulation = survey_scenario.simulation
simulation = survey_scenario.simulations["baseline"]
period = periods.period('2017-01')
assert (
simulation.calculate('salary', period) == input_data_frame_by_entity['person']['salary']
Expand Down Expand Up @@ -315,7 +297,7 @@ def test_dump_survey_scenario():
assert not person.empty
del survey_scenario
survey_scenario = AbstractSurveyScenario()
survey_scenario.set_tax_benefit_systems(tax_benefit_system = tax_benefit_system)
survey_scenario.set_tax_benefit_systems(dict(baseline = tax_benefit_system))
survey_scenario.used_as_input_variables = ['salary', 'rent']
survey_scenario.period = 2017
survey_scenario.restore_simulations(directory = directory)
Expand Down

0 comments on commit 8043732

Please sign in to comment.