Summary¶
Our neighborhood is in a suburb of Houston that is just off of a major highway that many people use for commuting. It’s a convenient place to either commute North into Houston or South to Lake Jackson. One reason we picked our house was its close proximity to the highway – I can be on the highway headed to work in about 5 minutes. But I’ve noticed it sometimes takes me 10 minutes just to go from my house to another house in the same community. So folks in other neighborhoods likely take significantly longer to get to and from the highway. The goal of the exercise below was to quantify the commuter convenience of my house.
Houston Commuters¶
The map below shows the average time for each neighborhood to get to and from the highway during commuting hours when commuting northbound to Houston.
m_nb
Lake Jackson / Freeport Commuters¶
The map below shows the average time for each neighborhood to get to and from the highway during commuting hours when commuting southbound to Lake Jackson. This is the direction I commute.
m_sb
As demonstrated above, my neighborhood is one of the best with respect to commute times. From my neighborhood it takes roughly 6 minutes to get to the highway in the morning and 6 minutes to get to my house from the highway in the afternoon – a combined 12 minutes per day. Many neighborhoods are twice that!
Extrapolating those numbers to a whole year means if I commute 5 days a week x 50 weeks a year, I’m in my car for 2 days worth of time per year just going to and from the highway. Some of the neighborhoods west of me spend 4 days per year in their car just getting to and from the highway.
Details on generating drive time maps…¶
import pandas as pd
import requests
import pickle
import os.path
import json
import time
from tqdm import tqdm
import numpy as np
import seaborn as sns
Import spreadsheet and format addresses¶
# Pulled a list of addresses for my community from county tax website
cols_of_interest = ['prop_id','hood_cd','market', 'school','city', 'county',
'legal_desc','tract_or_lot', 'abs_subdv_cd',
'situs_num','situs_street', 'situs_street_sufix','situs_city', 'situs_state', 'situs_zip',
'addr_line1', 'addr_line2','addr_line3', 'addr_city', 'addr_state', 'zip']
adds_df = pd.read_excel('address_list.xlsx')
adds_df = adds_df[cols_of_interest]
# adds_df.head(2)
# Clean up all of the columns that contain property address data
# ['situs_num', 'situs_street', 'situs_street_sufix', 'situs_city', 'situs_state', 'situs_zip']
adds_df['situs_num'] = adds_df['situs_num'].fillna(0)
adds_df['situs_street'] = adds_df['situs_street'].fillna('')
adds_df['situs_street_sufix'] = adds_df['situs_street_sufix'].fillna('')
adds_df['situs_city'] = adds_df['situs_city'].fillna('Pearland')
adds_df['situs_state'] = adds_df['situs_state'].fillna('TX')
adds_df['situs_zip'] = adds_df['situs_zip'].fillna('77584')
adds_df['situs_num'] = adds_df['situs_num'].apply(lambda x: str(int(x)))
adds_df['situs_street'] = adds_df['situs_street'].apply(lambda x: str(x).strip())
adds_df['situs_street_sufix'] = adds_df['situs_street_sufix'].apply(lambda x: str(x).strip())
adds_df['situs_city'] = adds_df['situs_city'].apply(lambda x: str(x).strip())
adds_df['situs_state'] = adds_df['situs_state'].apply(lambda x: str(x).strip())
adds_df['situs_zip'] = adds_df['situs_zip'].apply(lambda x: str(int(x)))
# Create new columns to construct full address which will be used in Bing Maps searches
adds_df.insert(0,'house_address_partial','',allow_duplicates=False)
adds_df.insert(0,'house_address','',allow_duplicates=False)
# adds_df.head(2)
# populate new columns for creating full address
adds_df['house_address_partial'] = adds_df['situs_num'] + ' ' + adds_df['situs_street'] + ' ' + adds_df['situs_street_sufix']
adds_df['house_address'] = adds_df['house_address_partial'] + ', ' + adds_df['situs_city'] + ', ' + adds_df['situs_state'] + ' ' + adds_df['situs_zip']
adds_df['prop_id'] = adds_df['prop_id'].apply(lambda x: int(x))
adds_df = adds_df.set_index('prop_id')
# adds_df.head(2)
Query Bing Maps API¶
testPropid = adds_df.index[0]
testAddress = adds_df['house_address'].iloc[0]
print([testPropid,testAddress])
[539398, '2204 HOLLOW SHORE ST, PEARLAND, TX 77584']
with open('secrets.pickle','rb') as f:
secrets = pickle.load(f)
# Route payloads contains details for 4 different navigational routes:
# 1) morning Northbound (AM-N) and 2) its afternoon counterpart (PM-N)
# 3) morning Southbound (AM-S) and 4) its afternoon counterpart (PM-S)
# I had to make afternoon times as arrival or else api didn't successfully respond
route_payloads = {
'AM-N':{'wp.1':'29.595583, -95.386315','dateTime':'11/08/2023 08:00:00','timeType':'Departure'},
'PM-N':{'wp.0':'29.594770, -95.386830','dateTime':'11/08/2023 17:30:00','timeType':'Arrival'},
'AM-S':{'wp.1':'29.549921, -95.387785','dateTime':'11/08/2023 08:00:00','timeType':'Departure'},
'PM-S':{'wp.0':'29.543948, -95.387128','dateTime':'11/08/2023 17:30:00','timeType':'Arrival'}
}
def bing_maps_query(house_address,route_payload):
my_api_key = secrets['api_key']
bing_map_url = r'http://dev.virtualearth.net/REST/V1/Routes/Driving'
payload = {
'avoid': 'tolls',
'key': my_api_key,
'distanceUnit': 'mi',
'travelMode': 'Driving',
'optimize': 'timeWithTraffic'
}
payload = payload|route_payload
if 'wp.0' in payload.keys():
payload['wp.1'] = house_address
else:
payload['wp.0'] = house_address
r = requests.get('http://dev.virtualearth.net/REST/V1/Routes/Driving', params=payload)
return r
# Some poor man's unit tests
# testResponse = bing_maps_query(testAddress,route_payloads['AM-S'])
# print(testResponse)
# test_responses = {i:bing_maps_query(testAddress,route_payloads[i]) for i in route_payloads}
# {i:test_responses[i].json()['resourceSets'][0]['resources'][0]['travelDuration'] for i in test_responses}
def format_response_filepath(propid,descriptor):
subfolder = 'query_responses'
response_filename = str(propid) + '|' + descriptor + '.pickle'
response_path = os.path.join(subfolder,response_filename)
return response_path
format_response_filepath(testPropid,'AM-N')
'query_responses/539398|AM-N.pickle'
Loop through the addresses¶
# get routes for all addresses
propid_subset = list(adds_df.index)
# add columns for route drive distances and times
new_col_initialVals = {'travelDistance':0.0,'distanceUnit':'','travelDuration':0,'travelDurationTraffic':0,'durationUnit':''}
new_columns = {'|'.join([route,col]):new_col_initialVals[col] for col in new_col_initialVals for route in route_payloads}
for col in new_columns:
adds_df.insert(1,col,new_columns[col],allow_duplicates=False)
# add columns for address latitude/longitude
adds_df.insert(1,'lon',0.0,allow_duplicates=False)
adds_df.insert(1,'lat',0.0,allow_duplicates=False)
# adds_df.head(2)
# loop through property ID / addresses.
# Check if we've already saved the route info, otherwise query maps API and save the response
for propid in tqdm(propid_subset):
prop_address = adds_df.loc[propid]['house_address']
for route in route_payloads:
prop_route_filepath = format_response_filepath(propid,route)
if os.path.isfile(prop_route_filepath):
with open(prop_route_filepath,'rb') as f:
r = pickle.load(f)
else:
r = bing_maps_query(prop_address,route_payloads[route])
with open(prop_route_filepath,'wb') as f:
pickle.dump(r,f)
# Extract some data from good responses
if r.json()['statusCode'] == 200:
route_data = r.json()['resourceSets'][0]['resources'][0]
result_keys = {
'travelDistance':float,
'distanceUnit':str,
'travelDuration':int,
'travelDurationTraffic':int,
'durationUnit':str
}
# get results of interest, cast into proper data types, insert into df
results = {key:result_keys[key](route_data[key]) for key in result_keys}
new_columns = ['|'.join([route,key]) for key in result_keys]
adds_df.loc[propid,new_columns] = [results[key] for key in results]
# get latitude/longitude
if route.startswith('AM'):
adds_df.loc[propid,['lat','lon']] = route_data['routeLegs'][0]['startLocation']['point']['coordinates']
100%|██████████| 4578/4578 [00:14<00:00, 316.08it/s]
Histograms of time to get to/from highway¶
Northbound commutes¶
adds_df['Combined-N|travelDurationTraffic'] = np.round((adds_df['AM-N|travelDurationTraffic']+adds_df['PM-N|travelDurationTraffic'])/60)
temp_route = 'Combined-N|travelDurationTraffic'
sns.histplot(data=adds_df[adds_df[temp_route]>1],x=temp_route,stat='probability')
<Axes: xlabel='Combined-N|travelDurationTraffic', ylabel='Probability'>
Southbound commutes¶
adds_df['Combined-S|travelDurationTraffic'] = np.round((adds_df['AM-S|travelDurationTraffic']+adds_df['PM-S|travelDurationTraffic'])/60)
temp_route = 'Combined-S|travelDurationTraffic'
sns.histplot(data=adds_df[adds_df[temp_route]>1],x=temp_route,stat='probability')
<Axes: xlabel='Combined-S|travelDurationTraffic', ylabel='Probability'>
Note: kind of odd that the above histogram has gaps in it. Likely due to entire neighborhoods falling into one or two bins
Folium maps¶
from shapely.geometry import MultiPoint
import folium
import geopandas as gpd
# filter out some unintentional properties on south
adds_df_subset = adds_df[adds_df['lat'] > 29.5550]
# adds_df_subset.head(2)
avg_hood_df = adds_df_subset[['hood_cd','Combined-N|travelDurationTraffic','Combined-S|travelDurationTraffic','market']].groupby('hood_cd').mean().copy().reset_index(drop=False)
# avg_hood_df.head(2)
hoods_geometry = [MultiPoint(adds_df_subset[adds_df_subset['hood_cd']==hood][['lon','lat']].to_numpy()) for hood in avg_hood_df['hood_cd']]
hoods_gpd = gpd.GeoDataFrame(avg_hood_df,geometry=hoods_geometry,crs='EPSG:4326')
hoods_gpd['geometry'] = hoods_gpd['geometry'].concave_hull(ratio=0.25)
# hoods_gpd.head(2)
map_center = (29.566122784955606, -95.41278501301733)
min_lat, max_lat = 29.555, 29.582
min_lon, max_lon = -95.455, -95.386
home_latlon = (29.562541, -95.398825)
m_nb = folium.Map(
max_bounds=True,
# zoom_control=False,
location=map_center,
min_zoom=13,
zoom_start=15,
max_zoom=17,
min_lat=min_lat,
max_lat=max_lat,
min_lon=min_lon,
max_lon=max_lon,
)
folium.Marker(
location=home_latlon,
icon=folium.Icon(icon="home"),
).add_to(m_nb)
folium.Choropleth(
geo_data=hoods_gpd,
data=hoods_gpd,
name="choropleth",
columns=["hood_cd", "Combined-N|travelDurationTraffic"],
key_on="feature.properties.hood_cd",
fill_color="OrRd",
fill_opacity=0.7,
line_opacity=0.2,
legend_name="Northbound - Daily travel time to/from highway (minutes)",
).add_to(m_nb)
m_nb
m_sb = folium.Map(
max_bounds=True,
# zoom_control=False,
location=map_center,
min_zoom=13,
zoom_start=15,
max_zoom=17,
min_lat=min_lat,
max_lat=max_lat,
min_lon=min_lon,
max_lon=max_lon,
)
folium.Marker(
location=home_latlon,
icon=folium.Icon(icon="home"),
).add_to(m_sb)
folium.Choropleth(
geo_data=hoods_gpd,
data=hoods_gpd,
name="choropleth",
columns=["hood_cd", "Combined-S|travelDurationTraffic"],
key_on="feature.properties.hood_cd",
fill_color="OrRd",
fill_opacity=0.7,
line_opacity=0.2,
legend_name="Soutbound - Daily travel time to/from highway (minutes)",
).add_to(m_sb)
m_sb