Skip to content

Commit

Permalink
Merge pull request #3 from HSLdevcom/gtfsrt_monitor
Browse files Browse the repository at this point in the history
Create script for monitoring GTFS-RT feeds
  • Loading branch information
mjaakko authored Dec 8, 2022
2 parents faa1e4e + 63a0d74 commit adca7ff
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 0 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ Now you can run:
python3 mqtt_data_collector.py
```

### Run GTFS-RT data collector

Add list of GTFS-RT URLs to an environment variable named `GTFSRT_URLS` and run:

```bash
python3 gtfsrt_data_collector.py
```

## Deployment

Deployment is done with ansible on the pulsar proxy server. In order to update this app, create a new release in github: https://github.com/HSLdevcom/transitdata-monitor-data-collector/releases/new and then run the pulsar proxy playbook.
72 changes: 72 additions & 0 deletions gtfsrt_data_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from google.transit import gtfs_realtime_pb2
import requests
import time
import json
from datetime import datetime
import os
from dotenv import load_dotenv
from send_data_to_azure_monitor import send_custom_metrics_request

load_dotenv()

IS_DEBUG = os.getenv('IS_DEBUG') == "True"

def get_stats(url):
feed = gtfs_realtime_pb2.FeedMessage()
response = requests.get(url)
feed.ParseFromString(response.content)

num_entities = len(feed.entity)
time_diff = round(time.time()) - feed.header.timestamp

return (num_entities, time_diff)

def send_data_to_azure_monitor(time, metric, url, value):
time_str = time.strftime("%Y-%m-%dT%H:%M:%S")

custom_metric_object = {
# Time (timestamp): Date and time at which the metric is measured or collected
"time": time_str,
"data": {
"baseData": {
# Metric (name): name of the metric
"metric": metric,
# Namespace: Categorize or group similar metrics together
"namespace": "GTFSRT",
# Dimension (dimNames): Metric has a single dimension
"dimNames": [
"URL"
],
# Series: data for each monitored topic
"series": [
{
"dimValues": [
url
],
"sum": value,
"count": 1
}
]
}
}
}

custom_metric_json = json.dumps(custom_metric_object)

if IS_DEBUG:
print(custom_metric_json)
else:
send_custom_metrics_request(custom_metric_json, 3)

def main():
urls = os.getenv("GTFSRT_URLS").split(",")
for url in urls:
(entity_count, last_published_ago_secs) = get_stats(url)

time = datetime.utcnow()

send_data_to_azure_monitor(time, "Entity Count", url, entity_count)
send_data_to_azure_monitor(time, "Timestamp Age", url, last_published_ago_secs)

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
paho-mqtt==1.6.1
python-dotenv==0.20.0
gtfs-realtime-bindings==0.0.7

0 comments on commit adca7ff

Please sign in to comment.