forked from openelections/openelections-data-ga
-
Notifications
You must be signed in to change notification settings - Fork 0
/
county_parser.py
77 lines (70 loc) · 3.57 KB
/
county_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import string
import requests
from BeautifulSoup import BeautifulSoup
import unicodecsv
OFFICES = ['United States Senator, Isakson', 'Governor', 'Lieutenant Governor', 'Secretary Of State', 'Attorney General',
'State School Superintendent', 'Commissioner Of Insurance', 'Commissioner Of Agriculture', 'Commissioner Of Labor',
'U.S. Representative, District 1', 'U.S. Representative, District 2', 'U.S. Representative, District 3', 'U.S. Representative, District 4',
'U.S. Representative, District 5', 'U.S. Representative, District 6', 'U.S. Representative, District 7', 'U.S. Representative, District 8',
'U.S. Representative, District 9', 'U.S. Representative, District 10', 'U.S. Representative, District 11',
'U.S. Representative, District 12', 'U.S. Representative, District 13']
def parse_statewide_url(url):
contests = []
r = requests.get(url)
soup = BeautifulSoup(r.text)
offices = soup.findAll('strong')[1:]
for office in offices:
if office.text in OFFICES:
o = office.text
def get_candidates(candidates_row):
candidates = []
for cell in candidates_row.findAll('td'):
if cell.text == ' ':
continue
elif cell.findAll('br')[0].previous.strip() == 'Totals':
continue
else:
candidates.append({'name': cell.findAll('br')[0].previous.strip(), 'party': cell.findAll('br')[1].next.strip().replace('(','').replace(')',''),
'total_votes': cell.findAll('br')[3].previous.strip().replace(',',''), 'counties': []})
return candidates
def parse_county_results(url):
r = requests.get(url)
soup = BeautifulSoup(r.text)
table = soup.findAll('table')[2]
rows = table.findAll('tr')
candidates = get_candidates(rows[0].find('table').find('tr'))
for row in rows[4:]:
county_name = row.find('td').text.strip()
for idx, r in enumerate(row.findAll('td')[2:-1]):
candidate = candidates[idx]
candidate['counties'].append({"county": county_name, 'votes': r.text.replace(',','')})
return candidates
def get_county_results(url, file_name, office, district):
results = parse_county_results(url)
with open(file_name, 'wb') as csvfile:
w = unicodecsv.writer(csvfile, encoding='utf-8')
w.writerow(['county', 'office', 'district', 'party', 'candidate', 'votes'])
for result in results:
for county in result['counties']:
w.writerow([county['county'], office, district, result['party'], result['name'], county['votes']])
def get_state_senate(base_url, districts):
with open('state_senate.csv', 'wb') as csvfile:
w = unicodecsv.writer(csvfile, encoding='utf-8')
for district in range(1, districts):
url = base_url + string.zfill(str(district), 2)+'.htm'
print url
results = parse_county_results(url)
for result in results:
for county in result['counties']:
w.writerow([county['county'], 'State Senate', district, result['party'], result['name'], county['votes']])
def get_state_house(base_url, districts):
with open('state_house.csv', 'wb') as csvfile:
w = unicodecsv.writer(csvfile, encoding='utf-8')
for district in range(501, districts):
url = base_url + str(district)+'.htm'
print url
d = district - 500
results = parse_county_results(url)
for result in results:
for county in result['counties']:
w.writerow([county['county'], 'State House', d, result['party'], result['name'], county['votes']])