-
Notifications
You must be signed in to change notification settings - Fork 0
/
aggregate.py
109 lines (80 loc) · 3.11 KB
/
aggregate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import asyncio
import json
import logging
from datetime import datetime
import aiohttp
from fimparser import parse_chair_overview, ChairParser
CHAIR_OVERVIEW_URL = "https://www.fim.uni-passau.de/forschung-und-professuren/lehrstuehle-professuren-und-fachgebiete"
class UniversityChair:
def __init__(self, name):
self.name = name
self._team = []
self._stef = []
def add_team_member(self, member: str):
self._team.append(member)
if member.lower().find("stef") > -1 or member.lower().find("steph") > -1:
self._stef.append(member)
def __str__(self):
return (
f"{self.name} has {len(self._team)} team members, "
f"including {len(self._stef)} Stef(s): {self._stef}"
)
def __repr__(self):
return self.__str__()
def stef_stats(self):
return (self.name, len(self._stef))
def stef_list(self):
return self._stef.copy()
async def get_chair_data():
logging.debug("getting chair overview page")
async with aiohttp.ClientSession() as session:
resp = await session.get(CHAIR_OVERVIEW_URL)
if resp.status != 200:
raise Exception(
f"request to overview page was not successful: {resp.status}"
)
chair_page_text = await resp.text()
chair_info = parse_chair_overview(chair_page_text)
chair_data = []
tasks = [
get_individual_chair(session, chair_link_tuple)
for chair_link_tuple in chair_info
]
for future in asyncio.as_completed(tasks):
try:
chair = await future
chair_data.append(chair)
except Exception as e:
logging.warning(f"failed to parse chair: {e}")
return chair_data
async def get_individual_chair(session, chair_link_tuple):
logging.debug(f"getting chair: {chair_link_tuple[0]}")
resp = await session.get(chair_link_tuple[1])
if resp.status != 200:
raise Exception(
f"request for chair {chair_link_tuple[1]} was not successful: {resp.status}. link: {chair_link_tuple[1]}"
)
chair_parser = ChairParser(chair_link_tuple[0])
chair_team_link = chair_parser.parse_landing_page(
chair_link_tuple[1], await resp.text()
)
resp = await session.get(chair_team_link)
if resp.status != 200:
raise Exception(
f"request for teampage of chair {chair_link_tuple[0]} was not successful: {resp.status}. link: {chair_team_link}"
)
# add context of chair to exception
try:
chair_team = chair_parser.parse_team_page(await resp.text())
except Exception as e:
raise Exception(f"{e} for chair {chair_link_tuple[0]}")
chair = UniversityChair(chair_link_tuple[0])
for member in chair_team:
chair.add_team_member(member)
return chair
def chair_data_to_json(chair_data):
data = [
{"chair_name": chair.name, "stef_list": chair._stef} for chair in chair_data
]
json_data = f'{{"timestamp": "{datetime.now().timestamp()}", "data": {json.dumps(data, indent=2)}}}'
return json_data