-
Notifications
You must be signed in to change notification settings - Fork 7
/
csv-prep.py
78 lines (63 loc) · 3.26 KB
/
csv-prep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import csv
import json
# Load and parse JSON data from managed-providers.json in one step
with open('managed-providers.json', 'r') as file:
data = json.load(file)
# Create a dictionary to store the data in the desired format
csv_data = {}
for item in data:
provider = item['provider']
model = item['model']
ingest = item['ingest']
output = item['output']
if provider not in csv_data:
csv_data[provider] = {'LLama 3.1 8b instruct ingest price': '',
'LLama 3.1 8b instruct output price': '',
'LLama 3.1 70b instruct ingest price': '',
'LLama 3.1 70b instruct output price': ''}
if model == 'LLama 3.1 8b instruct':
csv_data[provider]['LLama 3.1 8b instruct ingest price'] = ingest
csv_data[provider]['LLama 3.1 8b instruct output price'] = output
elif model == 'LLama 3.1 70b instruct':
csv_data[provider]['LLama 3.1 70b instruct ingest price'] = ingest
csv_data[provider]['LLama 3.1 70b instruct output price'] = output
with open('gpu-benchmarks.json') as json_file:
data = json.load(json_file)
# Calculate prices using the formula
for item in data:
provider = item['provider']
gpu = item['gpu']
cost_per_hour = item['cost_per_hour']
model_id = item['model_id']
input_throughput = item['input_throughput']
output_throughput = item['output_throughput']
ingest_price = ((cost_per_hour / 3600) / input_throughput) * 1000000
output_price = ((cost_per_hour / 3600) / output_throughput) * 1000000
# Format prices with 2 decimal places and add "$" in front
ingest_price_formatted = f"${ingest_price:.2f}"
output_price_formatted = f"${output_price:.2f}"
if gpu not in csv_data:
csv_data[gpu] = {'LLama 3.1 8b instruct ingest price': '',
'LLama 3.1 8b instruct output price': '',
'LLama 3.1 70b instruct ingest price': '',
'LLama 3.1 70b instruct output price': ''}
if '8b' in model_id.lower():
csv_data[gpu]['LLama 3.1 8b instruct ingest price'] = ingest_price_formatted
csv_data[gpu]['LLama 3.1 8b instruct output price'] = output_price_formatted
elif '70b' in model_id.lower():
csv_data[gpu]['LLama 3.1 70b instruct ingest price'] = ingest_price_formatted
csv_data[gpu]['LLama 3.1 70b instruct output price'] = output_price_formatted
# Sort csv data by LLama 3.1 8b instruct ingest price (from lowest to highest)
sorted_csv_data = dict(sorted(
((key, value) for key, value in csv_data.items() if value['LLama 3.1 8b instruct ingest price']),
key=lambda x: float(x[1]['LLama 3.1 8b instruct ingest price'].replace('$', ''))
))
# Write the sorted data to the CSV file
with open('LLM-Provider-comparison.csv', 'w', newline='') as csvfile:
fieldnames = ['Inference providers', 'LLama 3.1 8b instruct ingest price', 'LLama 3.1 8b instruct output price',
'LLama 3.1 70b instruct ingest price', 'LLama 3.1 70b instruct output price']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for provider, data in sorted_csv_data.items():
row = {'Inference providers': provider, **data}
writer.writerow(row)