Feature | 更新部分功能,详情见CHANGELOG

Cl0udG0d · Mar 28, 2023 · 5e0f39e · 5e0f39e
1 parent 24fd914
commit 5e0f39e
Show file tree

Hide file tree

Showing 10 changed files with 193 additions and 61 deletions.
diff --git a/README.md b/README.md
@@ -38,11 +38,11 @@ Fofa-hack>python fofa.py -h
         |__|  \____/|__| /__/\__\    
              _   _   ____   ____  __  __ 
             | |_| | / () \ / (__`|  |/  /
-            |_| |_|/__/\__\\____)|__|\__\ V2.0.6
+            |_| |_|/__/\__\\____)|__|\__\ V2.1.0
         
 usage: fofa.py [-h] [--timesleep TIMESLEEP] [--timeout TIMEOUT] --keyword KEYWORD [--endcount ENDCOUNT] [--level LEVEL] [--output OUTPUT] [--fuzz]
 
-Fofa-hack v2.0.6 使用说明
+Fofa-hack v2.1.0 使用说明
 
 optional arguments:
   -h, --help            show this help message and exit
@@ -58,7 +58,6 @@ optional arguments:
                         爬取等级: 1-3 ,数字越大内容越详细,默认为 1
   --output OUTPUT, -o OUTPUT
                         输出格式:txt、json,默认为txt
-  --fuzz, -f            关键字fuzz参数,增加内容获取粒度
 ```
 
 爬取的结果会存储到`md5(搜索关键字)_运行时间戳.txt`文件中
@@ -156,6 +155,6 @@ optional arguments:
 建了一个微信的安全交流群，欢迎添加我微信备注`进群`，一起来聊天吹水哇，以及一个会发布安全相关内容的公众号，欢迎关注 :)
 
 <div>
-    <img  alt="GIF" src="https://springbird.oss-cn-beijing.aliyuncs.com/img/mmqrcode1632325540724.png"  width="280px" />
-    <img  alt="GIF" src="https://springbird.oss-cn-beijing.aliyuncs.com/img/qrcode_for_gh_cead8e1080d6_344.jpg"  width="280px" />
+    <img  alt="JPG" src="https://springbird3.oss-cn-chengdu.aliyuncs.com/lianxiang/1a1f7894a170bec207e61bf86a01592.jpg"  width="280px" />
+    <img  alt="JPG" src="https://springbird3.oss-cn-chengdu.aliyuncs.com/lianxiang/qrcode_for_gh_cead8e1080d6_430.jpg"  width="280px" />
 </div>
diff --git a/config.py b/config.py
@@ -6,7 +6,7 @@
 # @Github: https://github.com/Cl0udG0d
 
 # Fofa-hack 版本号
-VERSION_NUM="2.0.6"
+VERSION_NUM="2.1.0"
 # 登录最大重试次数
 MAX_LOGIN_RETRY_NUM=3
 # 页面URL获取最大重试次数

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,6 +1,13 @@
 # CHANGELOG 代码变更记录
 
+### 2.1.0
+
++ 删除多余images
++ 修改readme图片
++ 添加country 自动化fuzz
+
 ### 2.0.6
+
 + 修改README等文件
 + 合并代码 - 优化时间戳逻辑
 

diff --git a/fofa.py b/fofa.py
@@ -12,6 +12,8 @@
 import config
 from tookit import unit, fofa_useragent
 import argparse
+
+from tookit.bypass import ByPass
 from tookit.levelData import LevelData
 from tookit.outputData import OutputData
 import re, requests
@@ -20,14 +22,26 @@
 
 
 class Fofa:
+    '''
+    FUZZ规则
+    '''
+    LEFT_LIST_RULE = '//div[@class="hsxa-meta-data-list-main-left hsxa-fl"]'
+    CITY_RULE = 'p[3]/a/@href'
+
+    CITY_SET = set()
+
+
     def __init__(self):
+        self.bypass = None
         self.headers_use = ""
         self.level = 0
         self.host_set = set()
-        self.timestamp_set = set()
+        self.timestamp_list=[set()]
         self.oldLength = -1
         self.endcount=0
         self.filename = ""
+        self.countryList=[]
+        self.timestampIndex=0
 
         print('''
          ____  ____  ____  ____      
@@ -51,9 +65,8 @@ def logoutInitMsg(self):
 [*] 爬取延时: {}s
 [*] 爬取关键字: {}
 [*] 爬取结束数量: {}
-[*] 是否FUZZ: {}
 [*] 输出格式为: {}
-[*] 存储文件名: {}'''.format(self.level,self.timeSleep,self.searchKey,self.endcount,self.fuzz,self.output,self.filename)
+[*] 存储文件名: {}'''.format(self.level,self.timeSleep,self.searchKey,self.endcount,self.output,self.filename)
         )
         return
 
@@ -71,7 +84,7 @@ def init(self):
         parser.add_argument('--endcount', '-e', help='爬取结束数量')
         parser.add_argument('--level', '-l', help='爬取等级: 1-3 ,数字越大内容越详细,默认为 1')
         parser.add_argument('--output', '-o', help='输出格式:txt、json,默认为txt')
-        parser.add_argument('--fuzz', '-f', help='关键字fuzz参数,增加内容获取粒度',action='store_true')
+        # parser.add_argument('--fuzz', '-f', help='关键字fuzz参数,增加内容获取粒度',action='store_true')
         args = parser.parse_args()
         self.timeSleep= int(args.timesleep)
         self.timeout = int(args.timeout)
@@ -82,7 +95,7 @@ def init(self):
             self.endcount=100
         self.level=args.level if args.level else "1"
         self.levelData=LevelData(self.level)
-        self.fuzz=args.fuzz
+        # self.fuzz=args.fuzz
         self.output = args.output if args.output else "txt"
         self.filename = "{}_{}.{}".format(unit.md5(self.searchKey), int(time.time()),self.output)
         self.outputData = OutputData(self.filename, pattern=self.output)
@@ -123,80 +136,130 @@ def getTimeList(self, text):
             timelist.append(temp.replace("<span>", "").replace("</span>", "").strip())
         return timelist
 
-    def fofa_spider_page(self, searchbs64):
+    def bypassCountry(self,context):
+        tree = etree.HTML(context)
+        leftList = tree.xpath(self.LEFT_LIST_RULE)
+        countryList=list()
+        for i in range(len(leftList)):
+            if len(leftList[i].xpath(self.CITY_RULE))>0:
+                cityURL = leftList[i].xpath(self.CITY_RULE)[0].strip()
+                city=self.filterKeyword(cityURL,"country")
+                if city not in self.CITY_SET and city!=None:
+                    self.CITY_SET.add(city)
+                    countryList.append(city)
+        # print(countryList)
+        return countryList
+
+    def filterKeyword(self,keyURL,key):
+        # print(keyURL)
+        searchbs64=keyURL.split("qbase64=")[1]
+        search_key=base64.b64decode(searchbs64).decode()
+        if key in search_key:
+            pattern = r'{}="([^"]+)"'.format(key)
+            match = re.search(pattern, search_key)
+            city = match.group(1)
+            return city
+
+    def fofa_spider_page(self, searchbs64,timestampIndex=0):
         """
         获取一页的数据
         :rtype: object
         """
+        searchbs64=searchbs64.replace("%3D","=")
+        init_search_key = base64.b64decode(searchbs64).decode()
+        print(init_search_key)
         TEMP_RETRY_NUM=0
 
         while TEMP_RETRY_NUM < config.MAX_MATCH_RETRY_NUM:
-            try:
+            # try:
                 request_url = 'https://fofa.info/result?qbase64=' + searchbs64 + "&full=false&page_size=10"
                 # print(f'request_url:{request_url}')
                 rep = requests.get(request_url, headers=self.headers_use, timeout=self.timeout)
-                self.levelData.startSpider(rep)
-
-                # tree = etree.HTML(rep.text)
-                # urllist = tree.xpath('//span[@class="hsxa-host"]/a/@href')
                 timelist = self.getTimeList(rep.text)
-                print("[*] 已爬取条数 [{}]: ".format(len(self.host_set))+str(self.levelData.formatData))
+                # print(timelist)
+                for temptime in timelist:
+                    self.timestamp_list[self.timestampIndex].add(temptime)
 
-                for i in self.levelData.formatData:
-                    with open(self.filename, 'a+', encoding="utf-8") as f:
-                        f.write(str(i) + "\n")
+                self.saveData(rep)
                 for url in self.levelData.formatData:
                     self.host_set.add(url)
-                for temptime in timelist:
-                    self.timestamp_set.add(temptime)
+
                 time.sleep(self.timeSleep)
+                '''
+                fuzz部分
+                '''
+                # self.bypass = ByPass(rep.text)
+                if "country" not in init_search_key:
+                    countryList=self.bypassCountry(rep.text)
+                    for country in countryList:
+                        search_key = init_search_key+ ' && country="' + str(country) + '"'
+                        # print(search_key)
+                        searchbs64_modify = quote_plus(base64.b64encode(search_key.encode("utf-8")))
+                        self.timestampIndex+=1
+                        self.timestamp_list.append(set())
+                        self.fofa_common_spider(search_key,searchbs64_modify,self.timestampIndex)
                 return
-            except Exception as e:
-                print("[-] error:{}".format(e))
-                TEMP_RETRY_NUM+=1
-                print('[-] 第{}次尝试获取页面URL'.format(TEMP_RETRY_NUM))
-                pass
+            # except Exception as e:
+            #     print("[-] error:{}".format(e))
+            #     TEMP_RETRY_NUM+=1
+            #     print('[-] 第{}次尝试获取页面URL'.format(TEMP_RETRY_NUM))
+            #     pass
 
 
         print('[-] FOFA资源获取重试超过最大次数,程序退出')
         exit(0)
 
+    def saveData(self,rep):
+        """
+        数据保存至文件
+        @param rep:
+        """
+        self.levelData.startSpider(rep)
+        # tree = etree.HTML(rep.text)
+        # urllist = tree.xpath('//span[@class="hsxa-host"]/a/@href')
+        print("[*] 已爬取条数 [{}]: ".format(len(self.host_set)) + str(self.levelData.formatData))
+
+        for i in self.levelData.formatData:
+            with open(self.filename, 'a+', encoding="utf-8") as f:
+                f.write(str(i) + "\n")
 
-    def fofa_common_spider(self, search_key, searchbs64):
+    def fofa_common_spider(self, search_key, searchbs64,index):
         while len(self.host_set) < self.endcount and self.oldLength !=len(self.host_set):
             self.oldLength=len(self.host_set)
-            self.timestamp_set.clear()
+            self.timestamp_list[index].clear()
             self.fofa_spider_page(searchbs64)
-            search_key_modify= self.modify_search_time_url(search_key)
+            search_key_modify= self.modify_search_time_url(search_key,index)
             # print(search_key_modify)
             searchbs64_modify = quote_plus(base64.b64encode(search_key_modify.encode()))
             search_key = search_key_modify
             searchbs64 = searchbs64_modify
         if len(self.host_set) >= self.endcount:
-            print("[*] 数据爬取结束")
+            # print("[*] 在{}节点,数据爬取结束".format(index))
             return
         if self.oldLength == len(self.host_set):
-            print("[-] 数据无新增,退出爬取")
+            print("[-] {}节点数据无新增,该节点枯萎".format(index))
             return
 
-    def fofa_fuzz_spider(self, search_key, searchbs64):
-        while len(self.host_set) < self.endcount and self.oldLength !=len(self.host_set):
-            self.oldLength=len(self.host_set)
-            self.timestamp_set.clear()
-            self.fofa_spider_page(searchbs64)
-            search_key_modify = self.modify_search_time_url(search_key)
+    # def fofa_fuzz_spider(self, search_key, searchbs64):
+    #     """
+    #     递归调用 fofa_common_spider 方法不断 fuzz
+    #     @param search_key:
+    #     @param searchbs64:
+    #     @return:
+    #     """
+    #     FUZZ_LIST=["country","port","server","protocol","title"]
+    #     for key in FUZZ_LIST:
+    #         if key not in search_key:
+    #             results=self.bypass.switchBypass(key)
+    #             for result in results:
+    #                 search_key = search_key + ' && ' + key +'="' + str(result) + '"'
+    #                 searchbs64_modify = quote_plus(base64.b64encode(search_key.encode()))
+    #                 self.fofa_common_spider(search_key,searchbs64_modify)
 
-            searchbs64_modify = quote_plus(base64.b64encode(search_key_modify.encode()))
-            search_key = search_key_modify
-            searchbs64 = searchbs64_modify
-        if len(self.host_set) >= self.endcount:
-            print("[*] 数据爬取结束")
-            return
-        if self.oldLength == len(self.host_set):
-            print("[-] 数据无新增,退出爬取")
-            return
+    # def fuzzCountry(self):
+    #     return
 
-    def modify_search_time_url(self, search_key):
+    def modify_search_time_url(self, search_key,index):
         """
         根据时间修订搜索值
         :param search_key:
@@ -211,10 +274,12 @@ def modify_search_time_url(self, search_key):
             match = re.search(pattern, search_key)
             before_time_in_search_key = match.group(1)
         time_before_time_in_search_key = datetime.strptime(before_time_in_search_key, "%Y-%m-%d").date()
-
+        # print(self.timestamp_list)
+        # print(index)
         # regard the_earliest_time.tomorrow as optimized time_before
-        timestamp_list=list(self.timestamp_set)
+        timestamp_list=list(self.timestamp_list[index])
         timestamp_list.sort()
+        # print(timestamp_list)
 
         time_first = timestamp_list[0].split(' ')[0].strip('\n').strip()
         time_first_time = datetime.strptime(time_first, "%Y-%m-%d").date()
@@ -227,7 +292,7 @@ def modify_search_time_url(self, search_key):
         #print(time_before)
 
         if 'before' in search_key:
-            print(search_key)
+            # print(search_key)
             search_key = search_key.split('&& before')[0]
             search_key = search_key.strip(' ')
             search_key = search_key + ' && ' + 'before="' + str(time_before) + '"'
@@ -241,10 +306,8 @@ def modify_search_time_url(self, search_key):
 
     def run(self):
         searchbs64 = self.get_count_num(self.searchKey)
-        if not self.fuzz:
-            self.fofa_common_spider(self.searchKey, searchbs64)
-        else:
-            self.fofa_fuzz_spider(self.searchKey, searchbs64)
+        self.fofa_common_spider(self.searchKey, searchbs64,0)
+
         print('[*] 抓取结束，共抓取数据 ' + str(len(self.host_set)) + ' 条\n')
 
     def main(self):

diff --git a/images/2.png b/images/2.png
diff --git a/images/fofa.jpg b/images/fofa.jpg
diff --git a/images/result.jpg b/images/result.jpg
diff --git a/images/trick.png b/images/trick.png
diff --git a/test.py b/test.py
@@ -11,7 +11,11 @@
 import requests
 from lxml import etree
 
-data=[1,2,3,4,5,6,7,8,9]
-result=[]
+from tookit.bypass import ByPass
 
-key='"thinkphp" && before="2023-03-14"'
+request_url = 'https://fofa.info/result?qbase64=InRoaW5rcGhwIg%3D%3D'
+# print(f'request_url:{request_url}')
+rep = requests.get(request_url,  timeout=5)
+# print(rep.text)
+bypass = ByPass(rep.text)
+bypass.bypassCountry()