Skip to content

Commit

Permalink
Feature | 更新部分功能,详情见CHANGELOG
Browse files Browse the repository at this point in the history
  • Loading branch information
Cl0udG0d committed Mar 28, 2023
1 parent 24fd914 commit 5e0f39e
Show file tree
Hide file tree
Showing 10 changed files with 193 additions and 61 deletions.
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ Fofa-hack>python fofa.py -h
|__| \____/|__| /__/\__\
_ _ ____ ____ __ __
| |_| | / () \ / (__`| |/ /
|_| |_|/__/\__\\____)|__|\__\ V2.0.6
|_| |_|/__/\__\\____)|__|\__\ V2.1.0
usage: fofa.py [-h] [--timesleep TIMESLEEP] [--timeout TIMEOUT] --keyword KEYWORD [--endcount ENDCOUNT] [--level LEVEL] [--output OUTPUT] [--fuzz]
Fofa-hack v2.0.6 使用说明
Fofa-hack v2.1.0 使用说明
optional arguments:
-h, --help show this help message and exit
Expand All @@ -58,7 +58,6 @@ optional arguments:
爬取等级: 1-3 ,数字越大内容越详细,默认为 1
--output OUTPUT, -o OUTPUT
输出格式:txt、json,默认为txt
--fuzz, -f 关键字fuzz参数,增加内容获取粒度
```

爬取的结果会存储到`md5(搜索关键字)_运行时间戳.txt`文件中
Expand Down Expand Up @@ -156,6 +155,6 @@ optional arguments:
建了一个微信的安全交流群,欢迎添加我微信备注`进群`,一起来聊天吹水哇,以及一个会发布安全相关内容的公众号,欢迎关注 :)

<div>
<img alt="GIF" src="https://springbird.oss-cn-beijing.aliyuncs.com/img/mmqrcode1632325540724.png" width="280px" />
<img alt="GIF" src="https://springbird.oss-cn-beijing.aliyuncs.com/img/qrcode_for_gh_cead8e1080d6_344.jpg" width="280px" />
<img alt="JPG" src="https://springbird3.oss-cn-chengdu.aliyuncs.com/lianxiang/1a1f7894a170bec207e61bf86a01592.jpg" width="280px" />
<img alt="JPG" src="https://springbird3.oss-cn-chengdu.aliyuncs.com/lianxiang/qrcode_for_gh_cead8e1080d6_430.jpg" width="280px" />
</div>
2 changes: 1 addition & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# @Github: https://github.com/Cl0udG0d

# Fofa-hack 版本号
VERSION_NUM="2.0.6"
VERSION_NUM="2.1.0"
# 登录最大重试次数
MAX_LOGIN_RETRY_NUM=3
# 页面URL获取最大重试次数
Expand Down
7 changes: 7 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# CHANGELOG 代码变更记录

### 2.1.0

+ 删除多余images
+ 修改readme图片
+ 添加country 自动化fuzz

### 2.0.6

+ 修改README等文件
+ 合并代码 - 优化时间戳逻辑

Expand Down
163 changes: 113 additions & 50 deletions fofa.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import config
from tookit import unit, fofa_useragent
import argparse

from tookit.bypass import ByPass
from tookit.levelData import LevelData
from tookit.outputData import OutputData
import re, requests
Expand All @@ -20,14 +22,26 @@


class Fofa:
'''
FUZZ规则
'''
LEFT_LIST_RULE = '//div[@class="hsxa-meta-data-list-main-left hsxa-fl"]'
CITY_RULE = 'p[3]/a/@href'

CITY_SET = set()


def __init__(self):
self.bypass = None
self.headers_use = ""
self.level = 0
self.host_set = set()
self.timestamp_set = set()
self.timestamp_list=[set()]
self.oldLength = -1
self.endcount=0
self.filename = ""
self.countryList=[]
self.timestampIndex=0

print('''
____ ____ ____ ____
Expand All @@ -51,9 +65,8 @@ def logoutInitMsg(self):
[*] 爬取延时: {}s
[*] 爬取关键字: {}
[*] 爬取结束数量: {}
[*] 是否FUZZ: {}
[*] 输出格式为: {}
[*] 存储文件名: {}'''.format(self.level,self.timeSleep,self.searchKey,self.endcount,self.fuzz,self.output,self.filename)
[*] 存储文件名: {}'''.format(self.level,self.timeSleep,self.searchKey,self.endcount,self.output,self.filename)
)
return

Expand All @@ -71,7 +84,7 @@ def init(self):
parser.add_argument('--endcount', '-e', help='爬取结束数量')
parser.add_argument('--level', '-l', help='爬取等级: 1-3 ,数字越大内容越详细,默认为 1')
parser.add_argument('--output', '-o', help='输出格式:txt、json,默认为txt')
parser.add_argument('--fuzz', '-f', help='关键字fuzz参数,增加内容获取粒度',action='store_true')
# parser.add_argument('--fuzz', '-f', help='关键字fuzz参数,增加内容获取粒度',action='store_true')
args = parser.parse_args()
self.timeSleep= int(args.timesleep)
self.timeout = int(args.timeout)
Expand All @@ -82,7 +95,7 @@ def init(self):
self.endcount=100
self.level=args.level if args.level else "1"
self.levelData=LevelData(self.level)
self.fuzz=args.fuzz
# self.fuzz=args.fuzz
self.output = args.output if args.output else "txt"
self.filename = "{}_{}.{}".format(unit.md5(self.searchKey), int(time.time()),self.output)
self.outputData = OutputData(self.filename, pattern=self.output)
Expand Down Expand Up @@ -123,80 +136,130 @@ def getTimeList(self, text):
timelist.append(temp.replace("<span>", "").replace("</span>", "").strip())
return timelist

def fofa_spider_page(self, searchbs64):
def bypassCountry(self,context):
tree = etree.HTML(context)
leftList = tree.xpath(self.LEFT_LIST_RULE)
countryList=list()
for i in range(len(leftList)):
if len(leftList[i].xpath(self.CITY_RULE))>0:
cityURL = leftList[i].xpath(self.CITY_RULE)[0].strip()
city=self.filterKeyword(cityURL,"country")
if city not in self.CITY_SET and city!=None:
self.CITY_SET.add(city)
countryList.append(city)
# print(countryList)
return countryList

def filterKeyword(self,keyURL,key):
# print(keyURL)
searchbs64=keyURL.split("qbase64=")[1]
search_key=base64.b64decode(searchbs64).decode()
if key in search_key:
pattern = r'{}="([^"]+)"'.format(key)
match = re.search(pattern, search_key)
city = match.group(1)
return city

def fofa_spider_page(self, searchbs64,timestampIndex=0):
"""
获取一页的数据
:rtype: object
"""
searchbs64=searchbs64.replace("%3D","=")
init_search_key = base64.b64decode(searchbs64).decode()
print(init_search_key)
TEMP_RETRY_NUM=0

while TEMP_RETRY_NUM < config.MAX_MATCH_RETRY_NUM:
try:
# try:
request_url = 'https://fofa.info/result?qbase64=' + searchbs64 + "&full=false&page_size=10"
# print(f'request_url:{request_url}')
rep = requests.get(request_url, headers=self.headers_use, timeout=self.timeout)
self.levelData.startSpider(rep)

# tree = etree.HTML(rep.text)
# urllist = tree.xpath('//span[@class="hsxa-host"]/a/@href')
timelist = self.getTimeList(rep.text)
print("[*] 已爬取条数 [{}]: ".format(len(self.host_set))+str(self.levelData.formatData))
# print(timelist)
for temptime in timelist:
self.timestamp_list[self.timestampIndex].add(temptime)

for i in self.levelData.formatData:
with open(self.filename, 'a+', encoding="utf-8") as f:
f.write(str(i) + "\n")
self.saveData(rep)
for url in self.levelData.formatData:
self.host_set.add(url)
for temptime in timelist:
self.timestamp_set.add(temptime)

time.sleep(self.timeSleep)
'''
fuzz部分
'''
# self.bypass = ByPass(rep.text)
if "country" not in init_search_key:
countryList=self.bypassCountry(rep.text)
for country in countryList:
search_key = init_search_key+ ' && country="' + str(country) + '"'
# print(search_key)
searchbs64_modify = quote_plus(base64.b64encode(search_key.encode("utf-8")))
self.timestampIndex+=1
self.timestamp_list.append(set())
self.fofa_common_spider(search_key,searchbs64_modify,self.timestampIndex)
return
except Exception as e:
print("[-] error:{}".format(e))
TEMP_RETRY_NUM+=1
print('[-] 第{}次尝试获取页面URL'.format(TEMP_RETRY_NUM))
pass
# except Exception as e:
# print("[-] error:{}".format(e))
# TEMP_RETRY_NUM+=1
# print('[-] 第{}次尝试获取页面URL'.format(TEMP_RETRY_NUM))
# pass


print('[-] FOFA资源获取重试超过最大次数,程序退出')
exit(0)

def saveData(self,rep):
"""
数据保存至文件
@param rep:
"""
self.levelData.startSpider(rep)
# tree = etree.HTML(rep.text)
# urllist = tree.xpath('//span[@class="hsxa-host"]/a/@href')
print("[*] 已爬取条数 [{}]: ".format(len(self.host_set)) + str(self.levelData.formatData))

for i in self.levelData.formatData:
with open(self.filename, 'a+', encoding="utf-8") as f:
f.write(str(i) + "\n")

def fofa_common_spider(self, search_key, searchbs64):
def fofa_common_spider(self, search_key, searchbs64,index):
while len(self.host_set) < self.endcount and self.oldLength !=len(self.host_set):
self.oldLength=len(self.host_set)
self.timestamp_set.clear()
self.timestamp_list[index].clear()
self.fofa_spider_page(searchbs64)
search_key_modify= self.modify_search_time_url(search_key)
search_key_modify= self.modify_search_time_url(search_key,index)
# print(search_key_modify)
searchbs64_modify = quote_plus(base64.b64encode(search_key_modify.encode()))
search_key = search_key_modify
searchbs64 = searchbs64_modify
if len(self.host_set) >= self.endcount:
print("[*] 数据爬取结束")
# print("[*] 在{}节点,数据爬取结束".format(index))
return
if self.oldLength == len(self.host_set):
print("[-] 数据无新增,退出爬取")
print("[-] {}节点数据无新增,该节点枯萎".format(index))
return

def fofa_fuzz_spider(self, search_key, searchbs64):
while len(self.host_set) < self.endcount and self.oldLength !=len(self.host_set):
self.oldLength=len(self.host_set)
self.timestamp_set.clear()
self.fofa_spider_page(searchbs64)
search_key_modify = self.modify_search_time_url(search_key)
# def fofa_fuzz_spider(self, search_key, searchbs64):
# """
# 递归调用 fofa_common_spider 方法不断 fuzz
# @param search_key:
# @param searchbs64:
# @return:
# """
# FUZZ_LIST=["country","port","server","protocol","title"]
# for key in FUZZ_LIST:
# if key not in search_key:
# results=self.bypass.switchBypass(key)
# for result in results:
# search_key = search_key + ' && ' + key +'="' + str(result) + '"'
# searchbs64_modify = quote_plus(base64.b64encode(search_key.encode()))
# self.fofa_common_spider(search_key,searchbs64_modify)

searchbs64_modify = quote_plus(base64.b64encode(search_key_modify.encode()))
search_key = search_key_modify
searchbs64 = searchbs64_modify
if len(self.host_set) >= self.endcount:
print("[*] 数据爬取结束")
return
if self.oldLength == len(self.host_set):
print("[-] 数据无新增,退出爬取")
return
# def fuzzCountry(self):
# return

def modify_search_time_url(self, search_key):
def modify_search_time_url(self, search_key,index):
"""
根据时间修订搜索值
:param search_key:
Expand All @@ -211,10 +274,12 @@ def modify_search_time_url(self, search_key):
match = re.search(pattern, search_key)
before_time_in_search_key = match.group(1)
time_before_time_in_search_key = datetime.strptime(before_time_in_search_key, "%Y-%m-%d").date()

# print(self.timestamp_list)
# print(index)
# regard the_earliest_time.tomorrow as optimized time_before
timestamp_list=list(self.timestamp_set)
timestamp_list=list(self.timestamp_list[index])
timestamp_list.sort()
# print(timestamp_list)

time_first = timestamp_list[0].split(' ')[0].strip('\n').strip()
time_first_time = datetime.strptime(time_first, "%Y-%m-%d").date()
Expand All @@ -227,7 +292,7 @@ def modify_search_time_url(self, search_key):
#print(time_before)

if 'before' in search_key:
print(search_key)
# print(search_key)
search_key = search_key.split('&& before')[0]
search_key = search_key.strip(' ')
search_key = search_key + ' && ' + 'before="' + str(time_before) + '"'
Expand All @@ -241,10 +306,8 @@ def modify_search_time_url(self, search_key):

def run(self):
searchbs64 = self.get_count_num(self.searchKey)
if not self.fuzz:
self.fofa_common_spider(self.searchKey, searchbs64)
else:
self.fofa_fuzz_spider(self.searchKey, searchbs64)
self.fofa_common_spider(self.searchKey, searchbs64,0)

print('[*] 抓取结束,共抓取数据 ' + str(len(self.host_set)) + ' 条\n')

def main(self):
Expand Down
Binary file removed images/2.png
Binary file not shown.
Binary file removed images/fofa.jpg
Binary file not shown.
Binary file removed images/result.jpg
Binary file not shown.
Binary file added images/trick.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 7 additions & 3 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
import requests
from lxml import etree

data=[1,2,3,4,5,6,7,8,9]
result=[]
from tookit.bypass import ByPass

key='"thinkphp" && before="2023-03-14"'
request_url = 'https://fofa.info/result?qbase64=InRoaW5rcGhwIg%3D%3D'
# print(f'request_url:{request_url}')
rep = requests.get(request_url, timeout=5)
# print(rep.text)
bypass = ByPass(rep.text)
bypass.bypassCountry()
Loading

0 comments on commit 5e0f39e

Please sign in to comment.