-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetcher.py
72 lines (62 loc) · 2.05 KB
/
fetcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import requests
import urllib.parse
import MySQLdb
import sys
from bs4 import BeautifulSoup
db = MySQLdb.connect("localhost", "zach", "zach", "nft_world_architecture", charset='utf8' )
cursor = db.cursor()
DOMAIN = "https://3dwarehouse.sketchup.com"
COLLECTION_ID = sys.argv[1]
COLLECTION_NAME = sys.argv[2]
CATEGORY = sys.argv[3]
PAGE_URL = DOMAIN + "/collection/" + COLLECTION_ID + "/" + COLLECTION_NAME
BASE_URL = DOMAIN + '/warehouse/v1.0/entities'
DATADICT = {}
# Web scrape page to get entity counts
page = requests.get(PAGE_URL)
soup = BeautifulSoup(page.content, "html.parser")
count = soup.find(class_="tab-count").get_text().replace("(", "").replace(")", "")
# API call to retrieve all entity IDs of the query
params = {
"recordEvent": "false",
"showBinaryMetadata": "true",
"showAttributes": "true",
"contentType": "3dw",
"fq": "parentIds==" + COLLECTION_ID + ";(subtype=exists=false,subtype=='')",
"count": count,
"offset": 0,
"sortBy": "createTime DESC"
}
url = BASE_URL + '?' + urllib.parse.urlencode(params)
headers = {
'Cookie': 'whp_unique=19ceaeb8-2d7d-434c-bd77-d1a8e61d2080'
}
response = requests.request("GET", url, headers=headers)
entries = response.json().get('entries')
for entry in entries:
id = entry.get('id')
title = entry.get('title')
author = entry.get('creator').get('displayName')
author_id = entry.get('creator').get('id')
binaries = entry.get('binaries')
if binaries.get('bot_lt'):
bot_lt = binaries.get('bot_lt')
thumbnail_url = bot_lt.get('contentUrl')
if binaries.get('s20'):
s20 = binaries.get('s20')
download_url = s20.get('url')
elif binaries.get('s19'):
s19 = binaries.get('s19')
download_url = s19.get('url')
# # API call to retrieve individual entity metadata
data = {
"id": id,
"title": title,
"author": author,
"author_id": author_id,
"thumbnail_url": thumbnail_url,
"download_url": download_url,
"category": CATEGORY
}
DATADICT[id] = data
print(DATADICT)