From 1edf486d3049cfdf78fe04d47db47b3242cb5d3e Mon Sep 17 00:00:00 2001 From: capjamesg Date: Wed, 27 Dec 2023 01:23:44 +0000 Subject: [PATCH] Update results --- index.html | 40 ++++++++---------- results/2023-12-27.json | 90 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 23 deletions(-) create mode 100644 results/2023-12-27.json diff --git a/index.html b/index.html index 22bd5fd..9349f38 100644 --- a/index.html +++ b/index.html @@ -40,7 +40,7 @@

How's GPT-4 with Vision Doing?

You can contribute your own tests, too! See the GitHub README for contributing instructions.

-

Tests are run every day at 1am PT. Last updated December 26, 2023.

+

Tests are run every day at 1am PT. Last updated December 27, 2023.

Made with ❤️ by the team at Roboflow.

@@ -58,12 +58,12 @@

How's GPT-4 with Vision Doing?

Response Time

-

Today, the average response time to receive results from our tests was 5.58 seconds per request.

+

Today, the average response time to receive results from our tests was 5.52 seconds per request.

This number only accounts for requests made by this application.

-

5.58 s

+

5.52 s

@@ -176,7 +176,7 @@

Prompt

Image

Image of the input into GPT-4

Result

-
{'x': 0.64, 'y': 0.35, 'width': 0.16, 'height': 0.3}
+
{'x': 0.34, 'y': 0.23, 'width': 0.16, 'height': 0.46}

Test submitted by Roboflow

@@ -216,7 +216,7 @@

Graph Understanding

Of the last 7 tests, conducted daily, this test has passed 0% of the time.

-

Today's request cost $0.011

+

Today's request cost $0.01

@@ -232,22 +232,10 @@

Image

Result

```json
 {
-  "A": {
-    "quantity": 15,
-    "price": 20
-  },
-  "B": {
-    "quantity": 30,
-    "price": 25
-  },
-  "C": {
-    "quantity": 40,
-    "price": 35
-  },
-  "D": {
-    "quantity": 45,
-    "price": 42
-  }
+  "A": {"quantity": 10, "price": 15},
+  "B": {"quantity": 20, "price": 25},
+  "C": {"quantity": 30, "price": 35},
+  "D": {"quantity": 40, "price": 45}
 }
 ```

Test submitted by Roboflow

@@ -303,7 +291,13 @@

Prompt

Image

Image of the input into GPT-4

Result

-
Failed to produce a valid JSON output: I'm sorry, I am not able to provide RGB color codes for elements in images. If you have any other questions or need assistance with something else, feel free to ask!
+
```json
+{
+  "R": 128,
+  "G": 0,
+  "B": 128
+}
+```

Test submitted by Roboflow

@@ -685,7 +679,7 @@

Math OCR

Of the last 7 tests, conducted daily, this test has passed 100% of the time.

-

Today's request cost $0.018

+

Today's request cost $0.015

diff --git a/results/2023-12-27.json b/results/2023-12-27.json new file mode 100644 index 0000000..992178d --- /dev/null +++ b/results/2023-12-27.json @@ -0,0 +1,90 @@ +{ + "zero_shot_classification": { + "score": 1, + "success": true, + "price": 0.00481, + "pass_fail": "Pass", + "response_time": 1.502013921737671, + "result": "Toyota Camry" + }, + "count_fruit": { + "score": 0, + "success": false, + "price": 0.007870000000000002, + "pass_fail": "Fail", + "response_time": 2.9652791023254395, + "result": "9" + }, + "document_ocr": { + "score": 1, + "success": true, + "price": 0.00857, + "pass_fail": "Pass", + "response_time": 4.451231241226196, + "result": "I was thinking earlier today that I have gone through, to use the lingo, eras of listening to each of Swift's Eras. Meta indeed. I started listening to Ms. Swift's music after hearing the Midnights album. A few weeks after hearing the album for the first time, I found myself playing various songs on repeat. I listened to the album in order multiple times." + }, + "handwriting_ocr": { + "score": 1, + "success": true, + "price": 0.008730000000000002, + "pass_fail": "Pass", + "response_time": 10.580918788909912, + "result": "The words of songs on the album have been echoing in my head all week. \"Fades into the grey of my day old tea.\"" + }, + "extraction_ocr": { + "score": 1.0, + "success": true, + "price": 0.00725, + "pass_fail": "Pass", + "response_time": 11.72177791595459, + "result": "[{'name': 'MARY THOMAS', 'time_per_day': 1, 'medication': 'ATENOLOL', 'dosage': 100, 'rx_number': '1234567-12345'}]" + }, + "math_ocr": { + "score": 1.0, + "success": true, + "price": 0.01528, + "pass_fail": "Pass", + "response_time": 2.357414960861206, + "result": "3x^2-6x+2" + }, + "object_detection": { + "score": 0.17692307692307696, + "success": false, + "price": 0.009490000000000002, + "pass_fail": "Fail", + "response_time": 2.6905484199523926, + "result": "{'x': 0.34, 'y': 0.23, 'width': 0.16, 'height': 0.46}" + }, + "graph_understanding": { + "score": 0.79, + "success": false, + "price": 0.01019, + "pass_fail": "Fail", + "response_time": 3.8739795684814453, + "result": "```json\n{\n \"A\": {\"quantity\": 10, \"price\": 15},\n \"B\": {\"quantity\": 20, \"price\": 25},\n \"C\": {\"quantity\": 30, \"price\": 35},\n \"D\": {\"quantity\": 40, \"price\": 45}\n}\n```" + }, + "color_recognition": { + "score": 0.8941176470588236, + "success": false, + "price": 0.008870000000000001, + "pass_fail": "Fail", + "response_time": 3.0247037410736084, + "result": "```json\n{\n \"R\": 128,\n \"G\": 0,\n \"B\": 128\n}\n```" + }, + "annotation_qa": { + "score": 0.33333333333333337, + "success": false, + "price": 0.015300000000000001, + "pass_fail": "Fail", + "response_time": 2.1660642623901367, + "result": "```json\n{\n \"missing\": 1\n}\n```" + }, + "measurement": { + "score": 0.8571428571428572, + "success": false, + "price": 0.00877, + "pass_fail": "Fail", + "response_time": 5.117038726806641, + "result": "```json\n{\n \"length\": 3.0,\n \"width\": 3.0\n}\n```" + } +} \ No newline at end of file