From 4441f3e9311b3e066b136c21e9fb676f058c49a7 Mon Sep 17 00:00:00 2001 From: Jaakko Malkki Date: Wed, 7 Dec 2022 14:54:00 +0200 Subject: [PATCH 1/2] Create script for monitoring GTFS-RT feeds --- gtfsrt_data_collector.py | 72 ++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + 2 files changed, 73 insertions(+) create mode 100644 gtfsrt_data_collector.py diff --git a/gtfsrt_data_collector.py b/gtfsrt_data_collector.py new file mode 100644 index 0000000..040272f --- /dev/null +++ b/gtfsrt_data_collector.py @@ -0,0 +1,72 @@ +from google.transit import gtfs_realtime_pb2 +import requests +import time +import json +from datetime import datetime +import os +from dotenv import load_dotenv +from send_data_to_azure_monitor import send_custom_metrics_request + +load_dotenv() + +IS_DEBUG = os.getenv('IS_DEBUG') == "True" + +def get_stats(url): + feed = gtfs_realtime_pb2.FeedMessage() + response = requests.get(url) + feed.ParseFromString(response.content) + + num_entities = len(feed.entity) + time_diff = round(time.time()) - feed.header.timestamp + + return (num_entities, time_diff) + +def send_data_to_azure_monitor(time, metric, url, value): + time_str = time.strftime("%Y-%m-%dT%H:%M:%S") + + custom_metric_object = { + # Time (timestamp): Date and time at which the metric is measured or collected + "time": time_str, + "data": { + "baseData": { + # Metric (name): name of the metric + "metric": metric, + # Namespace: Categorize or group similar metrics together + "namespace": "GTFSRT", + # Dimension (dimNames): Metric has a single dimension + "dimNames": [ + "URL" + ], + # Series: data for each monitored topic + "series": [ + { + "dimValues": [ + url + ], + "sum": value, + "count": 1 + } + ] + } + } + } + + custom_metric_json = json.dumps(custom_metric_object) + + if IS_DEBUG: + print(custom_metric_json) + else: + send_custom_metrics_request(custom_metric_json, 3) + +def main(): + urls = os.getenv("GTFSRT_URLS").split(",") + for url in urls: + (entity_count, last_published_ago_secs) = get_stats(url) + + time = datetime.utcnow() + + send_data_to_azure_monitor(time, "Entity Count", url, entity_count) + send_data_to_azure_monitor(time, "Timestamp Age", url, last_published_ago_secs) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 9b5ce34..3a57e83 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ paho-mqtt==1.6.1 python-dotenv==0.20.0 +gtfs-realtime-bindings==0.0.7 From 63a0d74153bcc564c35936dbc31ca6ac50f30ab6 Mon Sep 17 00:00:00 2001 From: Jaakko Malkki Date: Wed, 7 Dec 2022 15:19:48 +0200 Subject: [PATCH 2/2] Add note about running GTFS-RT monitor to README --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 762a72f..09dc16e 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,14 @@ Now you can run: python3 mqtt_data_collector.py ``` +### Run GTFS-RT data collector + +Add list of GTFS-RT URLs to an environment variable named `GTFSRT_URLS` and run: + +```bash +python3 gtfsrt_data_collector.py +``` + ## Deployment Deployment is done with ansible on the pulsar proxy server. In order to update this app, create a new release in github: https://github.com/HSLdevcom/transitdata-monitor-data-collector/releases/new and then run the pulsar proxy playbook.