-
Notifications
You must be signed in to change notification settings - Fork 0
/
ocr_v3.py
65 lines (51 loc) · 2.39 KB
/
ocr_v3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import requests
def ocr_space_file(filename, overlay=False, api_key='get_it_from_"ocr.space"', language='eng'):
'''
Purpose: To read text from an image
Variables Used:
1. payload : It is the data sent to the api url to get response.
It contains some variables to be used to while recognizing text.
The data which can be passed to it is described below-
1. isOverlayRequired - do we need bounding boxes?
default- False
2. apikey - get from https://ocr.space
~MUST~
3. language - short form of language of text to be recognized
default- 'en'
4. detectOrientation - could image bse rotated?
default- False
5. scale - Do image need some enhancements?
default- False
6. OCREngine - 1. fast but less accuracy
2. slow but good accuracy
(MORE DETAILS- https://ocr.space/ocrapi )
Return : string of response data in json format
'''
# making a dict of the data we need for our request
payload = {'isOverlayRequired': overlay,
'apikey': api_key,
'language': language,
'detectOrientation': True,
'scale': True,
'OCREngine': 2,
}
# Making request with data assigned above
with open(filename, 'rb') as f:
r = requests.post('https://api.ocr.space/parse/image',
files={filename: f},
data=payload,
)
# returning data received from the request
# Format- json data as string
return r.content.decode()
if __name__ == "__main__":
# Initializing image path to temporary variables
file_path = 'Sample_Images/test.png'
# Calling Functions
test_file = ocr_space_file(filename=file_path)
# Loading Json data
import json
data_test_file = json.loads(test_file)
# Printing Results
print('\n----------------TEST FILE---------------\n')
print(data_test_file['ParsedResults'][0]['ParsedText'])