forked from candy-kk/FofaSpider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
FofaSpider.py
175 lines (147 loc) · 8.68 KB
/
FofaSpider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# !/usr/bin/env python
# coding: utf-8
import sys
import re
import time
import xlwt
import base64
import random
import optparse
import requests
from datetime import datetime
from urllib.parse import quote,unquote
from lxml import etree
# 生成一个txt的输出结果
def banner():
print("""\033[36m
_____ __ ____ _ _
| ___|__ / _| __ _/ ___| _ __ (_) __| | ___ _ __
| |_ / _ \| |_ / _` \___ \| '_ \| |/ _` |/ _ \ '__|
| _| (_) | _| (_| |___) | |_) | | (_| | __/ |
|_| \___/|_| \__,_|____/| .__/|_|\__,_|\___|_|
|_| \033[0m
# coded by KpLi0rn website www.wjlshare.com
""")
def Cmd():
parser = optparse.OptionParser()
parser.add_option('-q', '--query', dest='query', type=str, help='Your Fofa Query')
parser.add_option('-r',dest='source',help='Your txt Path') # 批量搜索文件
parser.add_option('-p',dest='startpage',default=1,type=int,help='StartPage')
parser.add_option('-s',dest='spidernum',default=0,type=int,help='Spider number')
(options, args) = parser.parse_args()
return options,args
class FofaSpider(object):
def __init__(self,Cookie,Query,StartPage,SpiderNum):
self.q = quote(Query)
self.qbase64 = quote(str(base64.b64encode(Query.encode()),encoding='utf-8'))
self.UserAgent = ["Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36","Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16","Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)"]
# 这里的cookie 后面改成input 用户自己输入
self.cookie = Cookie
self.page = 1
self.startpage = StartPage
self.spidernum = SpiderNum
self.dt = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
self.name = ("".join((self.dt.split(" ")[0]).split('-'))) + "_" + "".join((self.dt.split(" ")[1]).split(':'))
def spider(self):
global i
i=0
compile = re.compile('HTTP/1.1 (\d+) ')
filename = xlwt.Workbook()
sheet = filename.add_sheet('result')
header = {
"User-Agent": random.choice(self.UserAgent),
"cookie": self.cookie
}
url = 'https://fofa.so/result?q={}&qbase64={}&full=true'.format(self.q, self.qbase64)
html = requests.get(url=url, headers=header).text
# pages = re.findall('<span clas="el-pagination__total">(\d*)</span>', html)
try:
nums =int("".join(re.findall('<span class="el-pagination__total">共 (.*) 条</span>',html)))
except ValueError as e:
nums = 1
if nums < 10:
page = 1
else:
page = nums//10+1
print("\033[31m总共有{}页\033[0m".format(page))
print("\033[31m查询语句为{}\033[0m".format(unquote(self.q,'utf-8')))
try:
pagenum = int(page) + 1
for n in range(self.startpage,pagenum):
if self.spidernum != 0 and (n == (self.startpage + self.spidernum +1)):
break
target = 'https://fofa.so/result?page={}&page_size=10&qbase64={}&full=true'.format(n, self.qbase64)
res = requests.get(url=target, headers=header).text
if "0</span> 条匹配结果" in res:
sys.stdout.write("\033[31m0条匹配结果,请检查查询语句是否正确\n\033[0m")
sys.exit(0)
if "error 500程序出错了" in res:
sys.stdout.write("\033[31m500程序出错了,请检查查询语句是否正确\n\033[0m")
sys.exit(0)
if "游客使用高级语法只能显示第一页" in res:
sys.stdout.write('\033[31m游客使用高级语法只能显示第一页\n\033[0m')
sys.stdout.write(
'\033[31m搜集结果为{}.csv、\033[0m\033[31m\t{}.txt\n\n\033[0m'.format(spider.name, spider.name))
sys.exit(0)
selector = etree.HTML(res)
# //*[@id="__layout"]/div/div/div/div/div/div/div/div/div/div/div/div/div/span/text()
# codes = "".join(selector.xpath('//*[@id="__layout"]/div/div/div/div/div/div/div/div/div/div/div/div/div/text()')) # 爬取状态码
codes = "".join(selector.xpath('//*[@id="__layout"]/div/div/div/div/div/div/div/div/div/div/div/div/div/span/text()')) # 爬取状态码
domain = selector.xpath('//*[@id="__layout"]/div/div/div/div/div/div/div/div/div/span/a/text()') # 爬取域名或ip
# domain_compile = re.compile("<a target=\"_blank\" href=\"(.*?)\"")
# domain = domain_compile.findall(html)
domain = [value.strip('\n').strip(' ') for value in domain if len(value.strip('\n').strip(' ')) != 0]
nums = compile.findall(codes) # 状态码列表
# rdp等协议类查询
if len(domain)==0:
scheme = selector.xpath('//*[@id="ajax_content"]/div/div[1]/div[1]/text()')
scheme = [value.strip(' ').strip('\n').strip(' ') for value in scheme]
print("\033[31m第%s页\033[0m" % str(n))
with open("{}.txt".format(self.name),"a+") as file:
for value in scheme:
print(value)
file.writelines(value)
file.writelines("\n")
sheet.write(i, 0, value)
i += 1
time.sleep(random.randint(5, 8))
else:
# 域名和ip聚合成字典
res = zip(domain,nums)
print("\033[31m第%s页\033[0m" % str(n))
with open("{}.txt".format(self.name),"a+") as file:
for value in res:
file.writelines(value[0])
file.writelines("\n")
print(value[0] + ": " + value[1])
sheet.write(i, 0, value[0])
sheet.write(i, 1, value[1])
i += 1
time.sleep(random.randint(5,8))
filename.save('./{}.csv'.format(self.name))
sys.stdout.write('\033[31m搜集结果为{}.csv、\033[0m\033[31m\t{}.txt\n\n\033[0m'.format(spider.name, spider.name))
except Exception as e:
print("'\033[31m[!]异常退出!\033[0m'")
print(e)
def run(self):
self.spider()
if __name__ == '__main__':
banner()
options, args = Cmd()
cookie = "".join(args)
try:
if options.source is not None:
with open(options.source,'r+',encoding='utf-8') as file:
for value in file.readlines():
value = value.strip('\n')
try:
spider = FofaSpider(cookie, value, options.startpage, options.spidernum)
spider.run()
except:
pass
else:
spider = FofaSpider(cookie,options.query, options.startpage, options.spidernum)
spider.run()
except KeyboardInterrupt:
print("\n\033[31m[!]用户退出\033[0m\n")
sys.stdout.write('\033[31m搜集结果为{}.csv、\033[0m\033[31m\t{}.txt\n\n\033[0m'.format(spider.name,spider.name))