-
Notifications
You must be signed in to change notification settings - Fork 0
/
jobsnepal.py
96 lines (88 loc) · 3.58 KB
/
jobsnepal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from bs4 import BeautifulSoup
import requests
import json
def jobsnepal():
count = 0
with open('C:/Projects/itjobseeker/public/jsondata/jobsnepal.json','r') as readfile:
try:
data = json.load(readfile)
stored_links = []
for single_data in data:
stored_links.append(single_data['Page_URL'])
except:
data = []
stored_links =[]
hlink = []
hlink.append('https://www.jobsnepal.com/category/information-technology-jobs')
hyper_source = requests.get('https://www.jobsnepal.com/category/information-technology-jobs').text
soup = BeautifulSoup(hyper_source, 'lxml')
hyper_links = soup.find_all('a', class_='page-link')
for hyperlink in hyper_links:
var = hyperlink['href']
hlink.append(var)
hlink = list(dict.fromkeys(hlink))
links = []
for slink in hlink:
source = requests.get(slink).text
soup = BeautifulSoup(source, 'lxml')
soup.find_all('a', class_='text-base')
for i in soup.find_all('a', class_='text-base'):
var = i['href']
links.append(var)
for link in links:
if link not in stored_links:
stored_links.append(link)
count = count + 1
print("[" + str(count) + "]", "New job found !",link)
source = requests.get(link).text
soup = BeautifulSoup(source, 'lxml')
company = soup.find('a', class_='text-white').get_text(strip=True)
name = soup.find('h1', class_='job-title').get_text(strip=True)
table_data = soup.find('table', class_='table-striped')
i_deadline = table_data.find_all('tr')
education = ""
experience = ""
salary = ""
level = ""
time = ""
for i in i_deadline:
index = i.td.get_text(strip=True)
if index == "Openings":
vacancy = i.find_all('td')[1].get_text(strip=True)
elif index == "Salary":
salary = i.find_all('td')[1].get_text(strip=True)
elif index == "Position Type":
time = i.find_all('td')[1].get_text(strip=True)
elif index == "Position Level":
level = i.find_all('td')[1].get_text(strip=True)
elif index == "Experience":
experience = i.find_all('td')[1].get_text(strip=True)
elif index == "Education":
education = i.find_all('td')[1].get_text(strip=True)
elif index == "Apply Before":
deadline = i.find_all('td')[1].get_text(strip=True)
elif index == "City":
address = i.find_all('td')[1].get_text(strip=True)
desct = soup.find('div', class_='col-lg-8').get_text(strip=True)
print(link)
data.append({
'name': name,
'company': company,
'vacancy': vacancy,
'time': time,
'address': address,
'deadline': deadline,
'education': education,
'experience':experience,
'level':level,
'salary':salary,
'desct':desct,
'Page_URL': link,
'websitename': 'jobsnepal.com'
})
else:
print("Already in database")
with open('C:/Projects/itjobseeker/public/jsondata/jobsnepal.json', 'w') as outfile:
json.dump(data, outfile)
print("jobsnepal done")
jobsnepal()