From 5aa40404d19a861d7175d5b9dd85afa83a7feda6 Mon Sep 17 00:00:00 2001 From: Holger Bruch Date: Fri, 5 May 2023 15:18:29 +0200 Subject: [PATCH] Stuttgart: add generic DatexScraperMixin and scraper/geojson for stuttgart --- new/stuttgart.geojson | 642 ++++++++++++++++++++++++++++++++++++++++++ new/stuttgart.py | 26 ++ util/__init__.py | 1 + util/datex.py | 119 ++++++++ 4 files changed, 788 insertions(+) create mode 100644 new/stuttgart.geojson create mode 100644 new/stuttgart.py create mode 100644 util/datex.py diff --git a/new/stuttgart.geojson b/new/stuttgart.geojson new file mode 100644 index 0000000..cf24d43 --- /dev/null +++ b/new/stuttgart.geojson @@ -0,0 +1,642 @@ +{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "id": "stuttgart12717223", + "name": "Haus der Geschichte", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 46, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717226", + "name": "Bohnenviertel", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 160, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717229", + "name": "Breuninger", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 580, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717244", + "name": "Galeria Kaufhof (Rathausviertel)", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 111, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717250", + "name": "Parkhaus Hofdiener", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 143, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717253", + "name": "Galeria Kaufhof (Unt. Königstr.)", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 427, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717256", + "name": "Schloßstraße", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 175, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717262", + "name": "City-Garage", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 140, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717289", + "name": "Österreichischer Platz", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 564, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717312", + "name": "Im Zeppelin Carré", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 265, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717315", + "name": "Züblin", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 530, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717345", + "name": "Friedrichsbau", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 71, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717363", + "name": "Liederhalle / Bosch-Areal", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 110, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717366", + "name": "Holzgartenstraße", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 194, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717372", + "name": "Königsbau Passagen", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 312, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717375", + "name": "Kronprinzstraße", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 564, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717384", + "name": "Landtag", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 5, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717408", + "name": "Rotebühlplatz/Kulturzentrum", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 270, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717411", + "name": "Schillerplatz", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 138, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717414", + "name": "Schloßgarten (verbunden mit Marstallstraße)", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 385, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717417", + "name": "Schwabenzentrum", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 350, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717426", + "name": "Sophiengarage", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 181, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717429", + "name": "Staatsgalerie", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 251, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717432", + "name": "Stadtgarten (Katharinenhospital)", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 135, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717435", + "name": "Stephangarage", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 265, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717441", + "name": "LBBW (Hbf.)", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 870, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717447", + "name": "Tivoli", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 125, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12717450", + "name": "Tübinger Carre", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12718752", + "name": "Das Gerber", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 573, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12718764", + "name": "Milaneo", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 1174, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart12718779", + "name": "Rotebühlhof/Finanzamt", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 133, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78241073", + "name": "Rathaus", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78241093", + "name": "Neue Brücke", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 150, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78257172", + "name": "Keplerstraße", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 93, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78278431", + "name": "Landesbibliothek", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 121, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78278545", + "name": "Kriegsberg", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 235, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78278577", + "name": "P+R Österfeld", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 284, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78278879", + "name": "Bülow Carre", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 108, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78279135", + "name": "Dorotheen-Quartier", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 270, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78280073", + "name": "P+R Albstraße", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 100, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78318611", + "name": "Haus der Wirtschaft", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 52, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78319371", + "name": "Marquardtbau", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78319401", + "name": "Stadtmitte", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78319431", + "name": "Am Bollwerk", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78319463", + "name": "Am Schloßplatz", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78319492", + "name": "Wilhelma", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78357190", + "name": "BW-Bank/Kunstmuseum", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78357200", + "name": "Olgahospital", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + }, + { + "type": "Feature", + "properties": { + "id": "stuttgart78357206", + "name": "Le Méridian", + "type": "unknown", + "public_url": null, + "source_url": "https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + "address": null, + "capacity": 0, + "has_live_capacity": true + } + } + ] +} \ No newline at end of file diff --git a/new/stuttgart.py b/new/stuttgart.py new file mode 100644 index 0000000..941b2ea --- /dev/null +++ b/new/stuttgart.py @@ -0,0 +1,26 @@ +from typing import List + +from util import * + + +class Stuttgart(DatexScraperMixin, ScraperBase): + """ + This Scraper scrapes the re-published datex publications of the city of Stuttgart, to avoid + the currently still necessary certifcates for MDM data portal access. + + Note: the static publication currently doesn't publish some important information like coordinates, + type, address. I (Holger Bruch) contacted the data owner and asked for additional information in the + publication. + + In case this information will not be provided soon, the geojson will need to be enhanced manually :-/ + """ + POOL = PoolInfo( + id="stuttgart", + name="Stuttgart", + public_url="https://service.mdm-portal.de/mdm-portal-application/publDetail.do?publicationId=3059002", + source_url="https://data.mfdz.de/DATEXII_Parkdaten_dynamisch_Stuttgart/body.xml", + attribution_contributor="Landeshauptstadt Stuttgart, Tiefbauamt, mirrored by MFDZ", + attribution_license="dl-de/by-2-0", + ) + + STATIC_LOTS_URL = "https://data.mfdz.de/DATEXII_Parkdaten_statisch_Stuttgart/body.xml" diff --git a/util/__init__.py b/util/__init__.py index a90c594..f35e70e 100644 --- a/util/__init__.py +++ b/util/__init__.py @@ -11,3 +11,4 @@ from .scraper import ScraperBase from .soup import get_soup_text from .structs import PoolInfo, LotInfo, LotData +from .datex import DatexScraperMixin diff --git a/util/datex.py b/util/datex.py new file mode 100644 index 0000000..26edbce --- /dev/null +++ b/util/datex.py @@ -0,0 +1,119 @@ +from typing import List + +from .structs import LotInfo, LotData +from .strings import name_to_legacy_id, guess_lot_type, parse_geojson + +class DatexScraperMixin: + """ + This Mixin defines provides an implementation of get_lot_data and + get_lot_infos that should work with most DATEXII ParkingFacility publications. + + Subclasses should provide the dynamic parking facility publication url as POOl.source_url + and the static parking facility publication as class variable: + + STATIC_LOTS_URL: str + + See stuttgart.py for an example. + + """ + def get_lot_data(self) -> List[LotData]: + now = self.now() + soup = self.request_soup(self.POOL.source_url, encoding='UTF-8') + + last_updated = self.to_utc_datetime(soup.find("publicationtime").text) + + lots = [] + + for facility in soup.select("parkingfacilitytablestatuspublication > parkingfacilitystatus"): + lot_id = facility.find("parkingfacilityreference")["id"] + + capacity_shorttermoverride = facility.find("totalparkingcapacityshorttermoverride") + + parkingFacilityStatusTime = facility.find("parkingfacilitystatustime") + try: + lot_timestamp = self.to_utc_datetime(parkingFacilityStatusTime.text) if parkingFacilityStatusTime else last_updated + except: + lot_timestamp = last_updated + # TODO: Need not find out difference between + # totalNumberOfOccupiedParkingSpaces and totalNumberOfVacantParkingSpaces + # e.g. first goes to zero or might disappear when closed while second remains + try: + lot_occupied = int(facility.find("totalnumberofoccupiedparkingspaces").text) + except: + lot_occupied = None + + try: + lot_free = int(facility.find("totalnumberofvacantparkingspaces").text) + except: + lot_free = None + + state = facility.find("parkingfacilitystatus") + + if state and state.text in [LotData.Status.open, LotData.Status.closed]: + state = state.text + elif state and state.text == "spacesAvailable": + state = LotData.Status.open + else: + state = LotData.Status.nodata + + lots.append( + LotData( + id=name_to_legacy_id(self.POOL.id, lot_id), + timestamp=now, + lot_timestamp=lot_timestamp, + status=state, + num_occupied=lot_occupied, + num_free=lot_free, + capacity=int(capacity_shorttermoverride.text) if capacity_shorttermoverride else None, + ) + ) + + return lots + + def get_lot_infos(self) -> List[LotInfo]: + url = self.STATIC_LOTS_URL + soup = self.request_soup(url, encoding='UTF-8') + + lots = [] + for facility in soup.find_all("parkingfacility"): + coord = self._get_facility_coords(facility) + lots.append( + LotInfo( + id=name_to_legacy_id(self.POOL.id, facility["id"]), + name=self._get_facility_name(facility), + type=LotInfo.Types.unknown, # there's no data + source_url=self.POOL.source_url, + latitude=coord["latitude"] if coord else None, + longitude=foord["longitude"] if coord else None, + capacity=int(facility.find("totalparkingcapacity").text), + has_live_capacity=True, + ) + ) + + return lots + + def _get_facility_name(self, facility): + # Try name (used by Stuttgart) + pfname = facility.select_one("parkingFacilityName > values > value") + if pfname: + return pfname.text + + # If not, try parkingfacilitydescription (as (misused?) by Frankfurt) + dfdesc = facility.find("parkingfacilitydescription") + if dfdesc: + return dfdesc.text + + return None + + def _get_facility_coords(self, facility): + + point = facility.find("pointcoordinates") + + if not point: + # e.g. Stuttgart currently has no coord /o\ + return None + else: + return { + "latitude": float(point.find("latitude").text), + "longitude": float(point.find("longitude").text) + }