Skip to content

Commit

Permalink
Merge pull request #56 from BU-Spark/team-d
Browse files Browse the repository at this point in the history
Team d
  • Loading branch information
funkyvoong authored Jan 18, 2024
2 parents 75a963b + 8b4e64f commit a9b2d65
Show file tree
Hide file tree
Showing 46 changed files with 1,856,099 additions and 2 deletions.
1,254 changes: 1,254 additions & 0 deletions fa23-team-d/EDA_Notebooks/EDA_d3.ipynb

Large diffs are not rendered by default.

7,033 changes: 7,033 additions & 0 deletions fa23-team-d/EDA_Notebooks/eda.ipynb

Large diffs are not rendered by default.

325 changes: 325 additions & 0 deletions fa23-team-d/EDA_Notebooks/end_to_end_travel.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,325 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np \n",
"import pandas as pd\n",
"import seaborn as sns\n",
"pd.options.mode.chained_assignment = None "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>service_date</th>\n",
" <th>route_id</th>\n",
" <th>direction_id</th>\n",
" <th>half_trip_id</th>\n",
" <th>stop_id</th>\n",
" <th>time_point_id</th>\n",
" <th>time_point_order</th>\n",
" <th>point_type</th>\n",
" <th>standard_type</th>\n",
" <th>scheduled</th>\n",
" <th>actual</th>\n",
" <th>scheduled_headway</th>\n",
" <th>headway</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-01-01</td>\n",
" <td>01</td>\n",
" <td>Inbound</td>\n",
" <td>58061899.0</td>\n",
" <td>110</td>\n",
" <td>hhgat</td>\n",
" <td>1</td>\n",
" <td>Startpoint</td>\n",
" <td>Schedule</td>\n",
" <td>1900-01-01T06:05:00Z</td>\n",
" <td>1900-01-01T06:05:04Z</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-01-01</td>\n",
" <td>01</td>\n",
" <td>Inbound</td>\n",
" <td>58061899.0</td>\n",
" <td>67</td>\n",
" <td>maput</td>\n",
" <td>2</td>\n",
" <td>Midpoint</td>\n",
" <td>Schedule</td>\n",
" <td>1900-01-01T06:09:00Z</td>\n",
" <td>1900-01-01T06:06:28Z</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2023-01-01</td>\n",
" <td>01</td>\n",
" <td>Inbound</td>\n",
" <td>58061899.0</td>\n",
" <td>72</td>\n",
" <td>cntsq</td>\n",
" <td>3</td>\n",
" <td>Midpoint</td>\n",
" <td>Schedule</td>\n",
" <td>1900-01-01T06:12:00Z</td>\n",
" <td>1900-01-01T06:08:57Z</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2023-01-01</td>\n",
" <td>01</td>\n",
" <td>Inbound</td>\n",
" <td>58061899.0</td>\n",
" <td>75</td>\n",
" <td>mit</td>\n",
" <td>4</td>\n",
" <td>Midpoint</td>\n",
" <td>Schedule</td>\n",
" <td>1900-01-01T06:15:00Z</td>\n",
" <td>1900-01-01T06:12:41Z</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2023-01-01</td>\n",
" <td>01</td>\n",
" <td>Inbound</td>\n",
" <td>58061899.0</td>\n",
" <td>79</td>\n",
" <td>hynes</td>\n",
" <td>5</td>\n",
" <td>Midpoint</td>\n",
" <td>Schedule</td>\n",
" <td>1900-01-01T06:19:00Z</td>\n",
" <td>1900-01-01T06:16:35Z</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" service_date route_id direction_id half_trip_id stop_id time_point_id \\\n",
"0 2023-01-01 01 Inbound 58061899.0 110 hhgat \n",
"1 2023-01-01 01 Inbound 58061899.0 67 maput \n",
"2 2023-01-01 01 Inbound 58061899.0 72 cntsq \n",
"3 2023-01-01 01 Inbound 58061899.0 75 mit \n",
"4 2023-01-01 01 Inbound 58061899.0 79 hynes \n",
"\n",
" time_point_order point_type standard_type scheduled \\\n",
"0 1 Startpoint Schedule 1900-01-01T06:05:00Z \n",
"1 2 Midpoint Schedule 1900-01-01T06:09:00Z \n",
"2 3 Midpoint Schedule 1900-01-01T06:12:00Z \n",
"3 4 Midpoint Schedule 1900-01-01T06:15:00Z \n",
"4 5 Midpoint Schedule 1900-01-01T06:19:00Z \n",
"\n",
" actual scheduled_headway headway \n",
"0 1900-01-01T06:05:04Z NaN NaN \n",
"1 1900-01-01T06:06:28Z NaN NaN \n",
"2 1900-01-01T06:08:57Z NaN NaN \n",
"3 1900-01-01T06:12:41Z NaN NaN \n",
"4 1900-01-01T06:16:35Z NaN NaN "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"import pandas as pd\n",
"dfs = [] # Create an empty list to store dataframes\n",
"\n",
"# Example: Read multiple CSV files\n",
"arr_dep_dir = '../data/MBTA_Website/MBTA_Bus_Arrival_Departure_Times_2023/'\n",
"csv_files = os.listdir(arr_dep_dir)\n",
"csv_files = [os.path.join(arr_dep_dir, i) for i in csv_files][:6] \n",
"\n",
"for f in csv_files:\n",
" df = pd.read_csv(f)\n",
" dfs.append(df)\n",
"\n",
"df = pd.concat(dfs, axis=0, ignore_index=True)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"route_id\n",
"01 0 days 00:12:29.897617697\n",
"04 0 days 00:52:53.966500972\n",
"07 0 days 00:17:09.267979427\n",
"08 0 days 00:29:42.914191419\n",
"09 0 days 00:16:33.354348444\n",
"10 0 days 00:26:14.897579571\n",
"100 0 days 00:26:51.003679741\n",
"101 0 days 00:27:30.096403475\n",
"104 0 days 00:23:48.315156656\n",
"105 0 days 00:55:45.123865823\n",
"106 0 days 00:40:15.591193728\n",
"108 0 days 00:29:34.496422648\n",
"109 0 days 00:20:58.563276142\n",
"11 0 days 00:26:58.325604244\n",
"110 0 days 00:29:11.479423292\n",
"111 0 days 00:08:50.784762590\n",
"112 0 days 00:46:35.929147259\n",
"114 0 days 00:47:15.955206378\n",
"116 0 days 00:23:43.997474095\n",
"117 0 days 00:17:18.651579728\n",
"Name: actual, dtype: timedelta64[ns]\n"
]
}
],
"source": [
"filtered_df = df[df['point_type'].isin(['Startpoint', 'Endpoint'])] # calculate time for each trip\n",
"filtered_df['actual'] = pd.to_datetime(filtered_df['actual'])\n",
"\n",
"# Group the filtered DataFrame by 'route_id'\n",
"grouped = filtered_df.groupby('route_id')\n",
"\n",
"# Calculate the time difference for each group\n",
"time_difference = grouped['actual'].diff().abs()\n",
"time_difference.dropna(inplace=True)\n",
"\n",
"# Calculate the average time difference for each route\n",
"average_time_difference = time_difference.groupby(filtered_df['route_id']).mean()\n",
"\n",
"\n",
"# Display the results\n",
"print(average_time_difference.head(20))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"route_id\n",
"217 0 days 02:45:08.655584415\n",
"439 0 days 02:33:49.784576697\n",
"245 0 days 02:13:13.253574432\n",
"428 0 days 02:00:04.905533063\n",
"554 0 days 01:46:22.798531810\n",
"451 0 days 01:44:28.601766358\n",
"558 0 days 01:37:19.276116504\n",
"424 0 days 01:24:22.709447415\n",
"76 0 days 01:21:58.216290940\n",
"556 0 days 01:16:13.600459242\n",
"Name: actual, dtype: timedelta64[ns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"average_time_difference.sort_values(ascending=False).head(10) #longest routes\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"route_id\n",
"111 0 days 00:08:50.784762590\n",
"32 0 days 00:09:27.525725743\n",
"28 0 days 00:10:18.854261763\n",
"23 0 days 00:11:43.171046651\n",
"66 0 days 00:11:47.948230150\n",
"SL5 0 days 00:12:05.557564405\n",
"22 0 days 00:12:18.731593627\n",
"01 0 days 00:12:29.897617697\n",
"SL2 0 days 00:12:58.020416402\n",
"743 0 days 00:13:02.580268498\n",
"Name: actual, dtype: timedelta64[ns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"average_time_difference.sort_values(ascending=True).head(10) #fastest routes"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "mlproject",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit a9b2d65

Please sign in to comment.