-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMapping
29 lines (23 loc) · 1.16 KB
/
Mapping
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/env python3
#Census and Blockgroup equivalence
import pandas as pd
#Take a plain text file with equivalences for Texas congressional districts and census blocks, read as a dataframe
df_1=pd.read_csv('tx_cong.txt')
#Select only those rows that are for the 25th District
tx25=df_1.loc[df_1['01']==25]
#Make a new column with the census block group numbers
tx25_2=tx25.rename(columns={"484199502001056": "GEOID", "01": "District"})
tx25_2['strng']=tx25_2.GEOID.astype(str)
tx25_2['ID']=tx25_2.strng.str[0:12]
#Eliminate duplicates and corresponding rows in the new column
tx25_3=tx25_2.drop_duplicates(subset='ID',keep='first')
#Read the census data for Bell county as a dataframe, it has a column with the complete GEO_ID
df_data=pd.read_csv('data.csv')
#create new column that takes as its values the GEO_ID and goes from the 10th digit until the end
df_data['ID']=df_data.GEO_ID.str[9:]
#Create a list out of that column
l2=tx25_3['ID'].tolist()
#Go through the census data and only keep those rows where the GEOID is in the list of GEOIDs for the 25th district
df_final=df_data.loc[df_data['ID'].isin(l2)]
#Save the data frame as a .csv
df_final.to_csv('Bell_25.csv')