Step 2: Clean & Merge Project Data

Import Packages

Code
import geopandas as gpd
import numpy as np
import pandas as pd
import requests
from datetime import datetime
from shapely.geometry import Point
from shapely.ops import nearest_points

Street Data

Read in street centerline data

Code
street = gpd.read_file("Data/Street_Centerline.geojson")

Clean up street centerline data to keep only relevant information

Code
cols_to_keep = ["R_HUNDRED", "CLASS", "STNAME","ST_CODE","SEG_ID", "Shape__Length", "geometry"]
street = street[cols_to_keep]
street.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry
0 9200 2 BARTRAM AVE 16120 100002 1137.344551 LINESTRING (-75.25362 39.88134, -75.25383 39.8...
1 9100 2 BARTRAM AVE 16120 100003 371.888030 LINESTRING (-75.25149 39.88373, -75.25337 39.8...
2 0 2 BARTRAM AVE 16120 100004 48.720227 LINESTRING (-75.25337 39.88161, -75.25362 39.8...
3 8500 5 EASTWICK PL 30570 100006 292.693856 LINESTRING (-75.24576 39.89227, -75.24724 39.8...
4 8500 5 HARLEY PL 40570 100007 292.440377 LINESTRING (-75.24648 39.89267, -75.24798 39.8...
Code
street.to_file("Data/street_clean.geojson", driver='GeoJSON')

Paving Data

2022

Read in .csv made from PDF of 2022 paving program. Keep only relevant information and rename columns to match the street centerline data. Create a new column for whether the street was paved in 2022.

1 = The street was paved in 2022. 0 = The street was not paved in 2022.

Code
pav_2022 = pd.read_csv("Data/2022_Paving.csv")

cols_to_drop = ["Unnamed: 12", "Unnamed: 11", "Unnamed: 10", 
                "PAVING PROGRAM", "ROADWAY RESURFACING RATING", 
                "EMERGENCY UTILITY WORK", "DITCHES", "POTHOLES", 
                "CITY COUNCIL DISTRICT"]

pav_2022 = pav_2022.drop(columns=cols_to_drop)

pav_2022['2022_Paved']=1

pav_2022.rename(columns={'HUNDRED': 'R_HUNDRED'}, inplace=True)
pav_2022.rename(columns={'STREET': 'STNAME'}, inplace=True)

pav_2022.head()
R_HUNDRED STNAME FROM TO 2022_Paved
0 200 38TH PARALLEL PL DOCK ST SPRUCE ST 1
1 5300 ALFRED ST W PENN ST W COULTER ST 1
2 5400 ARCHER ST EARLHAM TER W COULTER ST 1
3 2400 BAINBRIDGE ST S 24TH ST BRINLEY CT 1
4 2500 BAINBRIDGE ST BRINLEY CT S BAMBREY ST 1

Merge paving data with the street centerline by the street name and number.

Code
street_merge_2022_pav = pd.merge(street, pav_2022, how='left', on=['STNAME', 'R_HUNDRED'])

cols_to_drop = ['FROM', 'TO']
street_merge_2022_pav = street_merge_2022_pav.drop(columns = cols_to_drop)

street_merge_2022_pav=street_merge_2022_pav.drop_duplicates(subset=['R_HUNDRED', 'STNAME', 'SEG_ID'])
street_merge_2022_pav['2022_Paved'].fillna(0, inplace=True)

street_merge_2022_pav.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry 2022_Paved
0 9200 2 BARTRAM AVE 16120 100002 1137.344551 LINESTRING (-75.25362 39.88134, -75.25383 39.8... 0.0
1 9100 2 BARTRAM AVE 16120 100003 371.888030 LINESTRING (-75.25149 39.88373, -75.25337 39.8... 0.0
2 0 2 BARTRAM AVE 16120 100004 48.720227 LINESTRING (-75.25337 39.88161, -75.25362 39.8... 0.0
3 8500 5 EASTWICK PL 30570 100006 292.693856 LINESTRING (-75.24576 39.89227, -75.24724 39.8... 0.0
4 8500 5 HARLEY PL 40570 100007 292.440377 LINESTRING (-75.24648 39.89267, -75.24798 39.8... 0.0
2023

Read in .csv made from PDF of 2023 paving program. Keep only relevant information and rename columns to match the street centerline data. Create a new column for whether the street was paved in 2023.

1 = The street was paved in 2023. 0 = The street was not paved in 2023.

Code
pav_2023 = pd.read_csv("Data/2023_Paving.csv")

cols_to_keep = ["HUNDRED", "ON"]
pav_2023 = pav_2023[cols_to_keep]

pav_2023['2023_Paved']=1

pav_2023.rename(columns={'HUNDRED': 'R_HUNDRED'}, inplace=True)
pav_2023.rename(columns={'ON': 'STNAME'}, inplace=True)

pav_2023.head()
R_HUNDRED STNAME 2023_Paved
0 2100 73RD AVE 1
1 2000 73RD AVE 1
2 1900 73RD AVE 1
3 1900 73RD AVE 1
4 1800 73RD AVE 1

Merge paving data with the street centerline by the street name and number.

Code
pav_2023['R_HUNDRED'] = pd.to_numeric(pav_2023['R_HUNDRED'], errors='coerce')

street_merge_2023_pav = pd.merge(street, pav_2023, how='left', on=['STNAME', 'R_HUNDRED'])

street_merge_2023_pav=street_merge_2023_pav.drop_duplicates(subset=['R_HUNDRED', 'STNAME', 'SEG_ID'])
street_merge_2023_pav['2023_Paved'].fillna(0, inplace=True)

street_merge_2023_pav.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry 2023_Paved
0 9200 2 BARTRAM AVE 16120 100002 1137.344551 LINESTRING (-75.25362 39.88134, -75.25383 39.8... 1.0
1 9100 2 BARTRAM AVE 16120 100003 371.888030 LINESTRING (-75.25149 39.88373, -75.25337 39.8... 1.0
2 0 2 BARTRAM AVE 16120 100004 48.720227 LINESTRING (-75.25337 39.88161, -75.25362 39.8... 1.0
5 8500 5 EASTWICK PL 30570 100006 292.693856 LINESTRING (-75.24576 39.89227, -75.24724 39.8... 0.0
6 8500 5 HARLEY PL 40570 100007 292.440377 LINESTRING (-75.24648 39.89267, -75.24798 39.8... 0.0
Combine Paving
Code
street_merge_pav = pd.merge(street_merge_2022_pav, 
                            street_merge_2023_pav, 
                            how = 'left', 
                            on =['R_HUNDRED', 'CLASS', 'STNAME', 'ST_CODE', 'SEG_ID','Shape__Length','geometry'])
street_merge_pav.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry 2022_Paved 2023_Paved
0 9200 2 BARTRAM AVE 16120 100002 1137.344551 LINESTRING (-75.25362 39.88134, -75.25383 39.8... 0.0 1.0
1 9100 2 BARTRAM AVE 16120 100003 371.888030 LINESTRING (-75.25149 39.88373, -75.25337 39.8... 0.0 1.0
2 0 2 BARTRAM AVE 16120 100004 48.720227 LINESTRING (-75.25337 39.88161, -75.25362 39.8... 0.0 1.0
3 8500 5 EASTWICK PL 30570 100006 292.693856 LINESTRING (-75.24576 39.89227, -75.24724 39.8... 0.0 0.0
4 8500 5 HARLEY PL 40570 100007 292.440377 LINESTRING (-75.24648 39.89267, -75.24798 39.8... 0.0 0.0
Code
street_merge_pav.to_file("Data/street_merge_pav.geojson", driver='GeoJSON')

No Thru Trucks

Load in the “No Thru Truck” data/ Create a column for whether or not the street is a thru street for trucks. 1 = Not a thru street for trucks, 0 = A thru street for trucks.

Code
truck = gpd.read_file("Data/No_thru_Trucks.geojson")

truck['Truck']=1
truck = truck[['SEG_ID', 'ST_CODE', 'STNAME','geometry', 'Truck']]

truck.head()
SEG_ID ST_CODE STNAME geometry Truck
0 422113 65260 POPLAR ST LINESTRING (-75.18540 39.97343, -75.18699 39.9... 1
1 421057 65260 POPLAR ST LINESTRING (-75.18485 39.97336, -75.18540 39.9... 1
2 421059 65260 POPLAR ST LINESTRING (-75.18215 39.97302, -75.18377 39.9... 1
3 421058 65260 POPLAR ST LINESTRING (-75.18377 39.97323, -75.18485 39.9... 1
4 422216 16880 BENJAMIN FRANKLIN PKWY LINESTRING (-75.17509 39.96119, -75.17680 39.9... 1

Merge truck data with street data.

Code
street_merge_truck = pd.merge(street, truck, how='left', on=['STNAME', 'ST_CODE', 'SEG_ID', 'geometry'])
street_merge_truck=street_merge_truck.drop_duplicates(subset=['STNAME', 'ST_CODE', 'SEG_ID','geometry'])
street_merge_truck['Truck'].fillna(0, inplace=True)

street_merge_truck.sort_values(by='Truck', ascending=False, inplace=True)
street_merge_truck.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry Truck
30622 200 5 W ASHDALE ST 13540 741177 207.279874 LINESTRING (-75.12765 40.02743, -75.12949 40.0... 1.0
8511 0 4 CHRISTIAN ST 23060 342581 61.551433 LINESTRING (-75.14384 39.93519, -75.14436 39.9... 1.0
8463 600 5 S AMERICAN ST 12460 342528 61.160192 LINESTRING (-75.14684 39.94086, -75.14693 39.9... 1.0
17998 3000 5 N PHILIP ST 64500 540250 99.921614 LINESTRING (-75.13445 39.99606, -75.13442 39.9... 1.0
17995 3200 4 JASPER ST 45980 540246 61.954342 LINESTRING (-75.11153 39.99554, -75.11107 39.9... 1.0
Code
street_merge_truck.to_file("Data/street_merge_truck.geojson", driver='GeoJSON')

311 Calls

2022

Load in 311 data. Clean data, filter it so it’s only “Street Defect”

Code
call311_2022 = pd.read_csv("Data/2022_311.csv")
call311_2022.dropna(subset=['lat'], inplace=True)
call311_2022=call311_2022[call311_2022['service_name'] == "Street Defect"]
call311_2022.head()
objectid service_request_id subject status status_notes service_name service_code agency_responsible service_notice requested_datetime updated_datetime expected_datetime closed_datetime address zipcode media_url lat lon
8 23205540 14882965 Pothole Repair Closed NaN Street Defect SR-ST01 Streets Department 46 Business Days 2022-04-22 15:45:16 2022-07-20 00:16:58 2022-07-04 20:00:00 2022-07-19 23:51:32 246 FERNON ST 19148 NaN 39.927853 -75.150313
10 23171928 14841323 Street Defect Closed NaN Street Defect SR-ST01 Streets Department 46 Business Days 2022-04-04 16:11:58 2023-02-15 22:13:40 2022-06-14 20:00:00 2022-07-06 03:12:05 2935 CECIL B MOORE AVE 19121 NaN 39.981959 -75.183079
17 23171934 14841514 Pothole Repair Closed NaN Street Defect SR-ST01 Streets Department 46 Business Days 2022-04-04 17:08:46 2023-02-15 22:13:40 2022-06-14 20:00:00 2022-09-17 03:45:20 1122 WALNUT ST 19107 https://d17aqltn7cihbm.cloudfront.net/uploads/... 39.948809 -75.160094
22 23171938 14841518 Pothole Repair Closed NaN Street Defect SR-ST01 Streets Department 46 Business Days 2022-04-04 17:09:48 2023-02-15 22:13:56 2022-06-14 20:00:00 2022-05-11 12:21:17 932 S SAINT BERNARD ST 19143 https://d17aqltn7cihbm.cloudfront.net/uploads/... 39.944998 -75.218785
56 23205542 14882968 Pothole Repair Closed NaN Street Defect SR-ST01 Streets Department 3 Business Days 2022-04-22 15:45:50 2022-04-30 13:03:39 2022-04-27 20:00:00 2022-04-30 13:03:23 9501 GERMANTOWN AVE 19118 NaN 40.084391 -75.224705

Convert the .csv to a geodataframe

Code
call311_2022_geo= [Point(xy) for xy in zip(call311_2022['lon'], call311_2022['lat'])]
call311_2022_gdf= gpd.GeoDataFrame(call311_2022, geometry=call311_2022_geo)
call311_2022_gdf.set_crs(epsg=4326, inplace=True)
print(call311_2022_gdf.crs)

Create a function that counts the number of points within a buffer of each road segment

Code
# Define a function to count points near each segment
def count_points_near_segment(segment_geometry, points, buffer_distance=.00002): #buffer of approximately 10m
    # Buffer the segment geometry to create a buffer around it
    buffered_segment = segment_geometry.buffer(buffer_distance)

    # Check if each point is within the buffered segment
    points_near_segment = points[points.geometry.within(buffered_segment)]

    # Return the count of points near the segment
    return len(points_near_segment)

Create new file with a column for the count of points for each segment

Code
street_merge_311_2022 = street.copy()
# Create a new column in the geojson GeoDataFrame to store the point counts
street_merge_311_2022['311_2022'] = street['geometry'].apply(
    lambda x: count_points_near_segment(x, call311_2022_gdf['geometry'])
)
street_merge_311_2022.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry 311_2022
0 9200 2 BARTRAM AVE 16120 100002 1137.344551 LINESTRING (-75.25362 39.88134, -75.25383 39.8... 0
1 9100 2 BARTRAM AVE 16120 100003 371.888030 LINESTRING (-75.25149 39.88373, -75.25337 39.8... 0
2 0 2 BARTRAM AVE 16120 100004 48.720227 LINESTRING (-75.25337 39.88161, -75.25362 39.8... 0
3 8500 5 EASTWICK PL 30570 100006 292.693856 LINESTRING (-75.24576 39.89227, -75.24724 39.8... 0
4 8500 5 HARLEY PL 40570 100007 292.440377 LINESTRING (-75.24648 39.89267, -75.24798 39.8... 0
2023

Load in 311 data and remove any rows without location.

Code
call311_2023 = pd.read_csv("Data/2023_311.csv")
call311_2023.dropna(subset=['lat'], inplace=True)

Convert the .csv to a geodataframe

Code
call311_2023_geo= [Point(xy) for xy in zip(call311_2023['lon'], call311_2023['lat'])]
call311_2023_gdf= gpd.GeoDataFrame(call311_2023, geometry=call311_2023_geo)
call311_2023_gdf.set_crs(epsg=4326, inplace=True)
print(call311_2023_gdf.crs)
EPSG:4326

Create new file with a column for the count of points for each segment

Code
street_merge_311_2023 = street.copy()
# Create a new column in the geojson GeoDataFrame to store the point counts
street_merge_311_2023['311_2023'] = street['geometry'].apply(
    lambda x: count_points_near_segment(x, call311_2023_gdf['geometry'])
)
street_merge_311_2023 = street_merge_311_2023.sort_values(by='311_2023', ascending=False)
street_merge_311_2023.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry 311_2023
34690 8000 2 CRESHEIM VALLEY DR 25820 820540 880.442668 LINESTRING (-75.19639 40.06663, -75.19626 40.0... 23
20632 200 3 CECIL B MOORE AVE 22010 542988 64.073073 LINESTRING (-75.14010 39.97643, -75.14067 39.9... 20
38699 8400 3 KREWSTOWN RD 48180 960601 1116.928647 LINESTRING (-75.05586 40.07326, -75.05595 40.0... 19
6559 700 4 FITZWATER ST 34280 340399 176.111382 LINESTRING (-75.15434 39.94031, -75.15589 39.9... 18
14012 600 4 N 10TH ST 87990 440803 55.911127 LINESTRING (-75.15418 39.96323, -75.15410 39.9... 16
Combine 311 Data
Code
street_merge_311 = pd.merge(street_merge_311_2022, 
                            street_merge_311_2023, 
                            how = 'left', 
                            on =['R_HUNDRED', 'CLASS', 'STNAME', 'ST_CODE', 'SEG_ID','Shape__Length','geometry'])
street_merge_311.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry 311_2022 311_2023
0 9200 2 BARTRAM AVE 16120 100002 1137.344551 LINESTRING (-75.25362 39.88134, -75.25383 39.8... 0 0
1 9100 2 BARTRAM AVE 16120 100003 371.888030 LINESTRING (-75.25149 39.88373, -75.25337 39.8... 0 0
2 0 2 BARTRAM AVE 16120 100004 48.720227 LINESTRING (-75.25337 39.88161, -75.25362 39.8... 0 0
3 8500 5 EASTWICK PL 30570 100006 292.693856 LINESTRING (-75.24576 39.89227, -75.24724 39.8... 0 0
4 8500 5 HARLEY PL 40570 100007 292.440377 LINESTRING (-75.24648 39.89267, -75.24798 39.8... 0 0
Code
street_merge_311.to_file("Data/street_merge_311.geojson", driver='GeoJSON')

Crashes

Load in crash data:

Code
crash = gpd.read_file("Data/fatal_crashes.geojson")
crash.head()
objectid year district dc_number date_ primary_st secondary_ age sex crash_type hit_____ru veh1 veh2 arrest_yes investigat dc_key point_x point_y time_of_day__24hrs_ geometry
0 342178 2023 35 067936 2023-10-23 04:00:00+00:00 1300 Windrim Ave. NaN 32.0 M Unit #1 was eastbound on the 1300 block of Win... No Auto Fixed Object No The deceased was the cause of the crash. 202335067936 -75.144326 40.030786 NaN POINT (-75.14433 40.03079)
1 342179 2023 24 074067 2023-10-27 04:00:00+00:00 Lehigh Ave. Jasper St. 45.0 F Unit #2 was stopped in traffic eastbound on Le... No Auto Auto No The deceased was the cause of the crash. 202324074067 -75.124325 39.988805 NaN POINT (-75.12432 39.98880)
2 342180 2023 24 074067 2023-10-27 04:00:00+00:00 Lehigh Ave. Jasper St. 57.0 M Unit #2 was stopped in traffic eastbound on Le... No Auto Auto No The person who caused the crash also died in t... 202324074067 -75.124325 39.988805 NaN POINT (-75.12432 39.98880)
3 342181 2023 24 075220 2023-11-01 04:00:00+00:00 Lehigh Ave. Emerald St. 33.0 M Unit #2 was standing in the west crosswalk in ... Yes Auto Pedestrian No Active investigation 202324075220 -75.123831 39.988780 NaN POINT (-75.12383 39.98878)
4 342182 2023 14 056753 2023-10-25 04:00:00+00:00 6329 Stenton Ave. NaN 62.0 M Unit #1 was northbound on Stenton when it stru... No Auto Pedestrian No Active investigation 202314056753 -75.165143 40.058944 NaN POINT (-75.16514 40.05894)
2022

Pull out the 2022 crash data.

Code
crash['date_'] = crash['date_'].astype(str)
crash_2022 = crash[crash['date_'].str.contains('2022')]
objectid year district dc_number date_ primary_st secondary_ age sex crash_type hit_____ru veh1 veh2 arrest_yes investigat dc_key point_x point_y time_of_day__24hrs_ geometry
425 341983 2022 24 026326 2022-05-10 04:00:00+00:00 Allegheny Ave. Kensington Ave. 32.0 M Unit #1 E/B on Allegheny Ave. at high speed, f... No Auto Pedestrian No No arrest. Driver deceased. 202224026326 -75.115579 39.995489 NaN POINT (-75.11558 39.99549)
426 341984 2022 24 026326 2022-05-10 04:00:00+00:00 Allegheny Ave. Kensington Ave. 45.0 M Unit #1 E/B on Allegheny Ave. at high speed, f... No Auto Pedestrian No No arrest. Driver deceased. 202224026326 -75.115579 39.995489 NaN POINT (-75.11558 39.99549)
427 341985 2022 24 026326 2022-05-10 04:00:00+00:00 Allegheny Ave. Kensington Ave. 40.0 F Unit #1 E/B on Allegheny Ave. at high speed, f... No Auto Pedestrian No No arrest. Driver deceased. 202224026326 -75.115579 39.995489 NaN POINT (-75.11558 39.99549)
428 341986 2022 25 025840 2022-05-12 04:00:00+00:00 5th St. Hunting Park Ave. 18.0 M Unit #1, (Auto), W/B on Hunting Park Ave. when... No Auto Dirt-bike No No arrest. Driver deceased. 202225025840 -75.135545 40.015094 NaN POINT (-75.13554 40.01509)
429 341987 2022 12 026044 2022-05-12 04:00:00+00:00 Paschall Ave. 7000 Block 50.0 F Bicyclist riding W/B in E/B lanes along side p... No Auto Bicycle Pending Under Investigation. 202212026044 -75.241161 39.918834 NaN POINT (-75.24116 39.91883)

Count number of crash points near each street segment.

Code
street_merge_crash_2022 = street.copy()
# Create a new column in the geojson GeoDataFrame to store the point counts
street_merge_crash_2022['crash_2022'] = street['geometry'].apply(
    lambda x: count_points_near_segment(x, crash_2022['geometry'])
)
street_merge_crash_2022 = street_merge_crash_2022.sort_values(by='crash_2022', ascending=False)
street_merge_crash_2022.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry crash_2022
28135 5400 2 GERMANTOWN AVE 36660 720528 203.260643 LINESTRING (-75.17061 40.03314, -75.17206 40.0... 2
17755 3100 2 N BROAD ST 19140 540010 233.942251 LINESTRING (-75.15327 40.00001, -75.15293 40.0... 2
29420 0 4 W COULTER ST 25340 722191 113.689853 LINESTRING (-75.17061 40.03314, -75.17067 40.0... 2
9063 200 2 S 52ND ST 88830 400566 85.461286 LINESTRING (-75.22552 39.95696, -75.22564 39.9... 2
28136 5300 2 GERMANTOWN AVE 36660 720529 179.222127 LINESTRING (-75.16933 40.03239, -75.17061 40.0... 2
2023

Pull out crashes from 2023.

Code
crash['date_'] = crash['date_'].astype(str)
crash_2023 = crash[crash['date_'].str.contains('2023')]
objectid year district dc_number date_ primary_st secondary_ age sex crash_type hit_____ru veh1 veh2 arrest_yes investigat dc_key point_x point_y time_of_day__24hrs_ geometry
0 342178 2023 35 067936 2023-10-23 04:00:00+00:00 1300 Windrim Ave. NaN 32.0 M Unit #1 was eastbound on the 1300 block of Win... No Auto Fixed Object No The deceased was the cause of the crash. 202335067936 -75.144326 40.030786 NaN POINT (-75.14433 40.03079)
1 342179 2023 24 074067 2023-10-27 04:00:00+00:00 Lehigh Ave. Jasper St. 45.0 F Unit #2 was stopped in traffic eastbound on Le... No Auto Auto No The deceased was the cause of the crash. 202324074067 -75.124325 39.988805 NaN POINT (-75.12432 39.98880)
2 342180 2023 24 074067 2023-10-27 04:00:00+00:00 Lehigh Ave. Jasper St. 57.0 M Unit #2 was stopped in traffic eastbound on Le... No Auto Auto No The person who caused the crash also died in t... 202324074067 -75.124325 39.988805 NaN POINT (-75.12432 39.98880)
3 342181 2023 24 075220 2023-11-01 04:00:00+00:00 Lehigh Ave. Emerald St. 33.0 M Unit #2 was standing in the west crosswalk in ... Yes Auto Pedestrian No Active investigation 202324075220 -75.123831 39.988780 NaN POINT (-75.12383 39.98878)
4 342182 2023 14 056753 2023-10-25 04:00:00+00:00 6329 Stenton Ave. NaN 62.0 M Unit #1 was northbound on Stenton when it stru... No Auto Pedestrian No Active investigation 202314056753 -75.165143 40.058944 NaN POINT (-75.16514 40.05894)

Count number of crash points near each street segment.

Code
street_merge_crash_2023 = street.copy()
# Create a new column in the geojson GeoDataFrame to store the point counts
street_merge_crash_2023['crash_2023'] = street['geometry'].apply(
    lambda x: count_points_near_segment(x, crash_2023['geometry'])
)
street_merge_crash_2023 = street_merge_crash_2023.sort_values(by='crash_2023', ascending=False)
street_merge_crash_2023.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry crash_2023
18926 1800 2 E LEHIGH AVE 49980 541203 192.773312 LINESTRING (-75.12549 39.98985, -75.12438 39.9... 3
32641 6100 2 HARBISON AVE 40440 761214 117.938073 LINESTRING (-75.06280 40.02441, -75.06269 40.0... 2
19052 1900 2 E LEHIGH AVE 49980 541333 171.514536 LINESTRING (-75.12438 39.98883, -75.12339 39.9... 2
1550 2200 2 PENROSE AVE 64080 221161 551.293937 LINESTRING (-75.18637 39.91005, -75.18712 39.9... 2
511 2100 2 COBBS CREEK PKWY 24060 200317 244.834677 LINESTRING (-75.24686 39.91877, -75.24690 39.9... 2
Combine Crash Data
Code
street_merge_crash = pd.merge(street_merge_crash_2022, 
                            street_merge_crash_2023, 
                            how = 'left', 
                            on =['R_HUNDRED', 'CLASS', 'STNAME', 'ST_CODE', 'SEG_ID','Shape__Length','geometry'])
street_merge_crash.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry crash_2022 crash_2023
0 5400 2 GERMANTOWN AVE 36660 720528 203.260643 LINESTRING (-75.17061 40.03314, -75.17206 40.0... 2 0
1 3100 2 N BROAD ST 19140 540010 233.942251 LINESTRING (-75.15327 40.00001, -75.15293 40.0... 2 0
2 0 4 W COULTER ST 25340 722191 113.689853 LINESTRING (-75.17061 40.03314, -75.17067 40.0... 2 0
3 200 2 S 52ND ST 88830 400566 85.461286 LINESTRING (-75.22552 39.95696, -75.22564 39.9... 2 0
4 5300 2 GERMANTOWN AVE 36660 720529 179.222127 LINESTRING (-75.16933 40.03239, -75.17061 40.0... 2 0
Code
street_merge_crash.to_file("Data/street_merge_crash.geojson", driver='GeoJSON')

Bicycle Level of Traffic Stress

Load in Bike Level of Traffic Stress data and city limits to trim the Bike LTS data to just Philadelphia.

Code
bike_LTS = gpd.read_file("Data/Bike_LTS.geojson")
bike_LTS = bike_LTS[['gid', 'length', 'lts_score', 'geometry']]

# Download the Philadelphia city limits
url = "https://opendata.arcgis.com/datasets/405ec3da942d4e20869d4e1449a2be48_0.geojson"
city_limits = gpd.read_file(url).to_crs(epsg=4326)

print(bike_LTS.crs)
print(city_limits.crs)
EPSG:4326
EPSG:4326

Iterate over each street segment to pull the LTS score from the nearest bike_LTS segment.

Code
street_merge_bike = street.copy()

# Iterate over each row in the 'street' GeoDataFrame
for index, street_row in street.iterrows():
    # Extract the geometry of the street line segment
    street_geometry = street_row['geometry']
    
    # Create a Point geometry from the centroid of the street line segment
    street_centroid = Point(street_geometry.centroid)
    
    # Find the index of the nearest 'bike_LTS_clip' line segment
    nearest_bike_index = bike_LTS_clip.geometry.distance(street_centroid).idxmin()
    
    # Extract the 'lts_score' from the nearest 'bike_LTS_clip' line segment
    lts_score = bike_LTS_clip.loc[nearest_bike_index, 'lts_score']
    
    # Add a new column to the result GeoDataFrame with the 'lts_score' value
    street_merge_bike.at[index, 'nearest_lts_score'] = lts_score
    
street_merge_bike.head()
C:\Users\epark\AppData\Local\Temp\ipykernel_25588\2918275708.py:12: UserWarning: Geometry is in a geographic CRS. Results from 'distance' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  nearest_bike_index = bike_LTS_clip.geometry.distance(street_centroid).idxmin()
C:\Users\epark\AppData\Local\Temp\ipykernel_25588\2918275708.py:12: UserWarning: Geometry is in a geographic CRS. Results from 'distance' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  nearest_bike_index = bike_LTS_clip.geometry.distance(street_centroid).idxmin()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry nearest_lts_score
0 9200 2 BARTRAM AVE 16120 100002 1137.344551 LINESTRING (-75.25362 39.88134, -75.25383 39.8... 3
1 9100 2 BARTRAM AVE 16120 100003 371.888030 LINESTRING (-75.25149 39.88373, -75.25337 39.8... 3
2 0 2 BARTRAM AVE 16120 100004 48.720227 LINESTRING (-75.25337 39.88161, -75.25362 39.8... 4
3 8500 5 EASTWICK PL 30570 100006 292.693856 LINESTRING (-75.24576 39.89227, -75.24724 39.8... 1
4 8500 5 HARLEY PL 40570 100007 292.440377 LINESTRING (-75.24648 39.89267, -75.24798 39.8... 1
Code
street_merge_bike.to_file("Data/street_merge_bike.geojson", driver='GeoJSON')

Combine All Data

Code
street_merge_crash=gpd.read_file("Data/street_merge_crash.geojson")
street_merge_truck=gpd.read_file("Data/street_merge_truck.geojson")
street_merge_pav=gpd.read_file("Data/street_merge_pav.geojson")
street_merge_311=gpd.read_file("Data/street_merge_311.geojson")
street_merge_bike=gpd.read_file("Data/street_merge_bike.geojson")


street_merge = pd.merge(street, street_merge_crash, how='left', on=['R_HUNDRED', 'CLASS', 'STNAME', 'ST_CODE', 'SEG_ID', 'Shape__Length', 'geometry'])

street_merge = pd.merge(street_merge, street_merge_truck, how='left', on=['R_HUNDRED', 'CLASS', 'STNAME', 'ST_CODE', 'SEG_ID', 'Shape__Length', 'geometry'])

street_merge = pd.merge(street_merge, street_merge_pav, how='left', on=['R_HUNDRED', 'CLASS', 'STNAME', 'ST_CODE', 'SEG_ID', 'Shape__Length', 'geometry'])

street_merge = pd.merge(street_merge, street_merge_311, how='left', on=['R_HUNDRED', 'CLASS', 'STNAME', 'ST_CODE', 'SEG_ID', 'Shape__Length', 'geometry'])

street_merge = pd.merge(street_merge, street_merge_bike, how='left', on=['R_HUNDRED', 'CLASS', 'STNAME', 'ST_CODE', 'SEG_ID', 'Shape__Length', 'geometry'])

street_merge.head()
R_HUNDRED CLASS STNAME ST_CODE SEG_ID Shape__Length geometry crash_2022 crash_2023 Truck 2022_Paved 2023_Paved 311_2022 311_2023 nearest_lts_score
0 9200 2 BARTRAM AVE 16120 100002 1137.344551 LINESTRING (-75.25362 39.88134, -75.25383 39.8... 0 0 0.0 0.0 1.0 0 0 3
1 9100 2 BARTRAM AVE 16120 100003 371.888030 LINESTRING (-75.25149 39.88373, -75.25337 39.8... 0 0 0.0 0.0 1.0 0 0 3
2 0 2 BARTRAM AVE 16120 100004 48.720227 LINESTRING (-75.25337 39.88161, -75.25362 39.8... 0 0 0.0 0.0 1.0 0 0 4
3 8500 5 EASTWICK PL 30570 100006 292.693856 LINESTRING (-75.24576 39.89227, -75.24724 39.8... 0 0 0.0 0.0 0.0 0 0 1
4 8500 5 HARLEY PL 40570 100007 292.440377 LINESTRING (-75.24648 39.89267, -75.24798 39.8... 0 0 0.0 0.0 0.0 0 0 1
Code
street_merge.to_file("Data/street_merge.geojson", driver='GeoJSON')