Skip to content

Commit

Permalink
[Minor] Split prep_or_copy_df into copy and check_multiple_series_id (#…
Browse files Browse the repository at this point in the history
…1647)

* separate copy from check multiple ID

* fix remaining references

* clean up

* add copy to tests

* fix tests

* update tests

* fixes

* finish fixes
  • Loading branch information
ourownstory authored Sep 13, 2024
1 parent fe309be commit 4904808
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1263,28 +1263,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Invalid frequency: NaT",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [27]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m future \u001b[38;5;241m=\u001b[39m m\u001b[38;5;241m.\u001b[39mmake_future_dataframe(df_test)\n\u001b[0;32m----> 2\u001b[0m forecast \u001b[38;5;241m=\u001b[39m \u001b[43mm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfuture\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m metrics \u001b[38;5;241m=\u001b[39m m\u001b[38;5;241m.\u001b[39mtest(df_test)\n\u001b[1;32m 4\u001b[0m forecast_trend \u001b[38;5;241m=\u001b[39m m\u001b[38;5;241m.\u001b[39mpredict_trend(df_test)\n",
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/forecaster.py:831\u001b[0m, in \u001b[0;36mNeuralProphet.predict\u001b[0;34m(self, df, decompose, raw)\u001b[0m\n\u001b[1;32m 829\u001b[0m df, received_ID_col, received_single_time_series, _ \u001b[38;5;241m=\u001b[39m df_utils\u001b[38;5;241m.\u001b[39mprep_or_copy_df(df)\n\u001b[1;32m 830\u001b[0m \u001b[38;5;66;03m# to get all forecasteable values with df given, maybe extend into future:\u001b[39;00m\n\u001b[0;32m--> 831\u001b[0m df, periods_added \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_extend_df\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 832\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_dataframe_to_predict(df)\n\u001b[1;32m 833\u001b[0m \u001b[38;5;66;03m# normalize\u001b[39;00m\n",
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/forecaster.py:2773\u001b[0m, in \u001b[0;36mNeuralProphet._maybe_extend_df\u001b[0;34m(self, df)\u001b[0m\n\u001b[1;32m 2771\u001b[0m extended_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame()\n\u001b[1;32m 2772\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m df_name, df_i \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mID\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 2773\u001b[0m _ \u001b[38;5;241m=\u001b[39m \u001b[43mdf_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minfer_frequency\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_i\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_lags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_lags\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_freq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2774\u001b[0m \u001b[38;5;66;03m# to get all forecasteable values with df given, maybe extend into future:\u001b[39;00m\n\u001b[1;32m 2775\u001b[0m periods_add[df_name] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_maybe_extend_periods(df_i)\n",
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/df_utils.py:1324\u001b[0m, in \u001b[0;36minfer_frequency\u001b[0;34m(df, freq, n_lags, min_freq_percentage)\u001b[0m\n\u001b[1;32m 1322\u001b[0m freq_df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m()\n\u001b[1;32m 1323\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m df_name, df_i \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mID\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 1324\u001b[0m freq_df\u001b[38;5;241m.\u001b[39mappend(\u001b[43m_infer_frequency\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_i\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmin_freq_percentage\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(freq_df)) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m n_lags \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1327\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOne or more dataframes present different major frequencies, please make sure all dataframes present the same major frequency for auto-regression\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1328\u001b[0m )\n",
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/df_utils.py:1252\u001b[0m, in \u001b[0;36m_infer_frequency\u001b[0;34m(df, freq, min_freq_percentage)\u001b[0m\n\u001b[1;32m 1250\u001b[0m dominant_freq_percentage \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mmax() \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mlen\u001b[39m(df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mds\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 1251\u001b[0m num_freq \u001b[38;5;241m=\u001b[39m frequencies[np\u001b[38;5;241m.\u001b[39margmax(distribution)] \u001b[38;5;66;03m# get value of most common diff\u001b[39;00m\n\u001b[0;32m-> 1252\u001b[0m inferred_freq \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_num_to_str_freq\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_freq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mds\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miloc\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1254\u001b[0m log\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 1255\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMajor frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00minferred_freq\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m corresponds to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnp\u001b[38;5;241m.\u001b[39mround(dominant_freq_percentage \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m100\u001b[39m, \u001b[38;5;241m3\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m% of the data.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1256\u001b[0m )\n\u001b[1;32m 1257\u001b[0m ideal_freq_exists \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m dominant_freq_percentage \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m min_freq_percentage \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n",
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/df_utils.py:1159\u001b[0m, in \u001b[0;36mconvert_num_to_str_freq\u001b[0;34m(freq_num, initial_time_stamp)\u001b[0m\n\u001b[1;32m 1144\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconvert_num_to_str_freq\u001b[39m(freq_num, initial_time_stamp):\n\u001b[1;32m 1145\u001b[0m \u001b[38;5;124;03m\"\"\"Convert numeric frequencies into frequency tags\u001b[39;00m\n\u001b[1;32m 1146\u001b[0m \n\u001b[1;32m 1147\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1157\u001b[0m \u001b[38;5;124;03m frequency tag\u001b[39;00m\n\u001b[1;32m 1158\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1159\u001b[0m aux_ts \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[43minitial_time_stamp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_timedelta\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfreq_num\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1160\u001b[0m freq_str \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39minfer_freq(aux_ts)\n\u001b[1;32m 1161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m freq_str\n",
"File \u001b[0;32m~/Desktop/code/neural_prophet/env/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py:1070\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, closed, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m 1067\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m freq \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m com\u001b[38;5;241m.\u001b[39many_none(periods, start, end):\n\u001b[1;32m 1068\u001b[0m freq \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1070\u001b[0m dtarr \u001b[38;5;241m=\u001b[39m \u001b[43mDatetimeArray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1071\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1072\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1073\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1074\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1075\u001b[0m \u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1076\u001b[0m \u001b[43m \u001b[49m\u001b[43mnormalize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnormalize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1077\u001b[0m \u001b[43m \u001b[49m\u001b[43minclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1078\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1079\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DatetimeIndex\u001b[38;5;241m.\u001b[39m_simple_new(dtarr, name\u001b[38;5;241m=\u001b[39mname)\n",
"File \u001b[0;32m~/Desktop/code/neural_prophet/env/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py:409\u001b[0m, in \u001b[0;36mDatetimeArray._generate_range\u001b[0;34m(cls, start, end, periods, freq, tz, normalize, ambiguous, nonexistent, inclusive)\u001b[0m\n\u001b[1;32m 404\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m com\u001b[38;5;241m.\u001b[39mcount_not_none(start, end, periods, freq) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m3\u001b[39m:\n\u001b[1;32m 405\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 406\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOf the four parameters: start, end, periods, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mand freq, exactly three must be specified\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 408\u001b[0m )\n\u001b[0;32m--> 409\u001b[0m freq \u001b[38;5;241m=\u001b[39m \u001b[43mto_offset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 412\u001b[0m start \u001b[38;5;241m=\u001b[39m Timestamp(start)\n",
"File \u001b[0;32mpandas/_libs/tslibs/offsets.pyx:3580\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.offsets.to_offset\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mpandas/_libs/tslibs/offsets.pyx:3682\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.offsets.to_offset\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Invalid frequency: NaT"
]
}
],
"outputs": [],
"source": [
"future = m.make_future_dataframe(df_test)\n",
"forecast = m.predict(future)\n",
Expand Down
6 changes: 4 additions & 2 deletions neuralprophet/data/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,8 @@ def _check_dataframe(
"Dataframe has less than n_forecasts + n_lags rows. "
"Forecasting not possible. Please either use a larger dataset, or adjust the model parameters."
)
df, _, _, _ = df_utils.prep_or_copy_df(df)
# df = df.copy(deep=True)
# df, _, _, _ = df_utils.check_multiple_series_id(df)
df, regressors_to_remove, lag_regressors_to_remove = df_utils.check_dataframe(
df=df,
check_y=check_y,
Expand Down Expand Up @@ -474,7 +475,8 @@ def _handle_missing_data(
The pre-processed DataFrame, including imputed missing data, if applicable.
"""
df, _, _, _ = df_utils.prep_or_copy_df(df)
# df = df.copy(deep=True)
# df, _, _, _ = df_utils.check_multiple_series_id(df)

if n_lags == 0 and not predicting:
# drop rows with NaNs in y and count them
Expand Down
3 changes: 2 additions & 1 deletion neuralprophet/data/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def _normalize(df: pd.DataFrame, config_normalization: Normalization) -> pd.Data
-------
df: pd.DataFrame, normalized
"""
df, _, _, _ = df_utils.prep_or_copy_df(df)
# df = df.copy(deep=True)
# df, _, _, _ = df_utils.check_multiple_series_id(df)
df_norm = pd.DataFrame()
for df_name, df_i in df.groupby("ID"):
data_params = config_normalization.get_data_params(df_name)
Expand Down
47 changes: 27 additions & 20 deletions neuralprophet/df_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class ShiftScale:
scale: float = 1.0


def prep_or_copy_df(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[str]]:
def check_multiple_series_id(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[str]]:
"""Copy df if it contains the ID column. Creates ID column with '__df__' if it is a df with a single time series.
Parameters
----------
Expand All @@ -42,26 +42,23 @@ def prep_or_copy_df(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[st
if not isinstance(df, pd.DataFrame):
raise ValueError("Provided DataFrame (df) must be of pd.DataFrame type.")

# Create a copy of the dataframe
df_copy = df.copy(deep=True)

df_has_id_column = "ID" in df_copy.columns
df_has_id_column = "ID" in df.columns

# If there is no ID column, then add one with a single value
if not df_has_id_column:
log.debug("Provided DataFrame (df) contains a single time series.")
df_copy["ID"] = "__df__"
return df_copy, df_has_id_column, True, ["__df__"]
df["ID"] = "__df__"
return df, df_has_id_column, True, ["__df__"]

# Create a list of unique ID values
unique_id_values = list(df_copy["ID"].unique())
unique_id_values = list(df["ID"].unique())
# Check if there is only one unique ID value
df_has_single_time_series = len(unique_id_values) == 1
num_time_series_id = len(unique_id_values)

single_or_multiple_message = "a single" if df_has_single_time_series else "multiple"
log.debug(f"Provided DataFrame (df) has an ID column and contains {single_or_multiple_message} time series.")
log.debug(f"Provided DataFrame (df) has an ID column and contains {num_time_series_id} time series.")

return df_copy, df_has_id_column, df_has_single_time_series, unique_id_values
return df, df_has_id_column, df_has_single_time_series, unique_id_values


def return_df_in_original_format(df, received_ID_col=False, received_single_time_series=True):
Expand Down Expand Up @@ -285,7 +282,8 @@ def init_data_params(
ShiftScale entries containing ``shift`` and ``scale`` parameters for each column
"""
# Compute Global data params
df, _, _, _ = prep_or_copy_df(df)
# df = df.copy(deep=True)
# df, _, _, _ = check_multiple_series_id(df)
df_merged = df.copy(deep=True).drop("ID", axis=1)
global_data_params = data_params_definition(
df_merged, normalize, config_lagged_regressors, config_regressors, config_events, config_seasonality
Expand Down Expand Up @@ -382,6 +380,8 @@ def normalize(df, data_params):
"""
df = df.copy(deep=True)
for name in df.columns:
if name == "ID":
continue
if name not in data_params.keys():
raise ValueError(f"Unexpected column {name} in data")
new_name = name
Expand Down Expand Up @@ -428,7 +428,8 @@ def check_dataframe(
pd.DataFrame or dict
checked dataframe
"""
df, _, _, _ = prep_or_copy_df(df)
# df = df.copy(deep=True)
# df, _, _, _ = check_multiple_series_id(df)
if df.groupby("ID").size().min() < 1:
raise ValueError("Dataframe has no rows.")
if "ds" not in df:
Expand Down Expand Up @@ -642,7 +643,9 @@ def _crossvalidation_with_time_threshold(df, n_lags, n_forecasts, k, fold_pct, f
min_train = total_samples - samples_fold - (k - 1) * (samples_fold - samples_overlap)
assert min_train >= samples_fold
folds = []
df_fold, _, _, _ = prep_or_copy_df(df)
df_fold = df
# df_fold = df.copy(deep=True)
# df_fold, _, _, _ = check_multiple_series_id(df_fold)
for i in range(k, 0, -1):
threshold_time_stamp = find_time_threshold(df_fold, n_lags, n_forecasts, samples_fold, inputs_overbleed=True)
df_train, df_val = split_considering_timestamp(
Expand Down Expand Up @@ -704,7 +707,8 @@ def crossvalidation_split_df(
validation data
"""
df, _, _, _ = prep_or_copy_df(df)
# df = df.copy(deep=True)
df, _, _, _ = check_multiple_series_id(df)
folds = []
if len(df["ID"].unique()) == 1:
for df_name, df_i in df.groupby("ID"):
Expand Down Expand Up @@ -764,7 +768,8 @@ def double_crossvalidation_split_df(df, n_lags, n_forecasts, k, valid_pct, test_
tuple of k tuples [(folds_val, folds_test), …]
elements same as :meth:`crossvalidation_split_df` returns
"""
df, _, _, _ = prep_or_copy_df(df)
# df = df.copy(deep=True)
# df, _, _, _ = check_multiple_series_id(df)
if len(df["ID"].unique()) > 1:
raise NotImplementedError("double_crossvalidation_split_df not implemented for df with many time series")
fold_pct_test = float(test_pct) / k
Expand Down Expand Up @@ -885,7 +890,8 @@ def split_df(
pd.DataFrame, dict
validation data
"""
df, _, _, _ = prep_or_copy_df(df)
# df = df.copy(deep=True)
# df, _, _, _ = check_multiple_series_id(df)
df_train = pd.DataFrame()
df_val = pd.DataFrame()
if local_split:
Expand Down Expand Up @@ -1367,7 +1373,8 @@ def infer_frequency(df, freq, n_lags, min_freq_percentage=0.7):
Valid frequency tag according to major frequency.
"""
df, _, _, _ = prep_or_copy_df(df)
# df = df.copy(deep=True)
# df, _, _, _ = check_multiple_series_id(df)
freq_df = list()
for df_name, df_i in df.groupby("ID"):
freq_df.append(_infer_frequency(df_i, freq, min_freq_percentage))
Expand Down Expand Up @@ -1410,8 +1417,8 @@ def create_dict_for_events_or_regressors(
if other_df is None:
# if other_df is None, create dictionary with None for each ID
return {df_name: None for df_name in df_names}

other_df, received_ID_col, _, _ = prep_or_copy_df(other_df)
other_df = other_df.copy(deep=True)
other_df, received_ID_col, _, _ = check_multiple_series_id(other_df)
# if other_df does not contain ID, create dictionary with original ID with the same other_df for each ID
if not received_ID_col:
other_df = other_df.drop("ID", axis=1)
Expand Down
Loading

0 comments on commit 4904808

Please sign in to comment.