From 0e1e2effc0bb0a850c837467872d89153968d283 Mon Sep 17 00:00:00 2001 From: maldevel Date: Wed, 20 Apr 2016 17:55:26 +0300 Subject: [PATCH] useragent string and proxy_server define user agent string and use proxy server --- EmailHarvester.py | 83 +++++++++++++++++++++++++++++++++-------------- README.md | 24 +++++++++++--- 2 files changed, 77 insertions(+), 30 deletions(-) diff --git a/EmailHarvester.py b/EmailHarvester.py index 27ea059..dd2b490 100644 --- a/EmailHarvester.py +++ b/EmailHarvester.py @@ -42,6 +42,7 @@ from termcolor import colored from argparse import RawTextHelpFormatter from sys import platform as _platform +from urllib.parse import urlparse ################################ @@ -96,15 +97,16 @@ def unique(self): ################################################################### class SearchEngine: - def __init__(self, urlPattern, word, limit, counterInit, counterStep): + def __init__(self, urlPattern, word, limit, counterInit, counterStep, userAgent, proxy): self.results = "" self.totalresults = "" - self.userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1" + self.userAgent = userAgent self.limit = int(limit) self.counter = int(counterInit) self.urlPattern = urlPattern self.step = int(counterStep) self.word = word + self.proxy = proxy def do_search(self): try: @@ -112,9 +114,19 @@ def do_search(self): headers = { 'User-Agent': self.userAgent, } - r=requests.get(urly, headers=headers) + + if(self.proxy): + proxies = { + self.proxy.scheme: "http://" + self.proxy.netloc + } + r=requests.get(urly, headers=headers, proxies=proxies) + else: + r=requests.get(urly, headers=headers) + except Exception as e: print(e) + sys.exit(4) + self.results = r.content.decode(r.encoding) self.totalresults += self.results @@ -122,8 +134,8 @@ def process(self): while (self.counter < self.limit): self.do_search() time.sleep(1) - print(green("\tSearching " + str(self.counter) + " results...")) self.counter += self.step + print(green("\tSearching " + str(self.counter) + " results...")) def get_emails(self): rawres = myparser(self.totalresults, self.word) @@ -149,7 +161,13 @@ def unique(data): if x not in unique: unique.append(x) return unique - + +def checkProxyUrl(url): + url_checked = urlparse(url) + if ((url_checked.scheme != 'http') & (url_checked.scheme != 'https')) | (url_checked.netloc == ''): + raise argparse.ArgumentTypeError('Invalid {} Proxy URL (example: http://127.0.0.1:8080).'.format(url)) + return url_checked + ################################################################### def limit_type(x): @@ -184,6 +202,8 @@ def engine_type(x): parser.add_argument("-s", '--save', metavar='FILE', dest='filename', type=str, help="Save the results into a TXT and XML file.") parser.add_argument("-e", '--engine', metavar='ENGINE', dest='engine', default="all", type=engine_type, help="Select search engine(google, bing, yahoo, ask, all).") parser.add_argument("-l", '--limit', metavar='LIMIT', dest='limit', type=limit_type, default=100, help="Limit the number of results.") + parser.add_argument('-u', '--user-agent', metavar='USER-AGENT', dest='uagent', type=str, help="Set the User-Agent request header.") + parser.add_argument('-x', '--proxy', metavar='PROXY', dest='proxy', type=checkProxyUrl, help='Setup proxy server (example: http://127.0.0.1:8080)') if len(sys.argv) is 1: parser.print_help() @@ -195,58 +215,71 @@ def engine_type(x): if(args.domain): domain = args.domain else: - print('[{}] {}'.format(red('ERROR'), "Please specify a domain name to search.")) + print(red("[-] Please specify a domain name to search.")) sys.exit(2) + userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1" + if(args.uagent): + userAgent = args.uagent + + print("User-Agent in use: {}".format(yellow(userAgent))) + + if(args.proxy): + print("Proxy server in use: {}".format(yellow(args.proxy.scheme + "://" + args.proxy.netloc))) + filename = "" if(args.filename): filename = args.filename limit = args.limit - engine = args.engine - + engine = args.engine + googleUrl = "http://www.google.com/search?num=100&start={counter}&hl=en&q=%40\"{word}\"" + bingUrl = "http://www.bing.com/search?q=%40{word}&count=50&first={counter}" + askUrl = "http://www.ask.com/web?q=%40{word}" + yahooUrl = "http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}" + if engine == "google": - print(green("[-] Searching in Google..\n")) - search = SearchEngine("http://www.google.com/search?num=100&start={counter}&hl=en&q=%40\"{word}\"", domain, limit, 0, 100) + print(green("[+] Searching in Google..\n")) + search = SearchEngine(googleUrl, domain, limit, 0, 100, userAgent, args.proxy) search.process() all_emails = search.get_emails() elif engine == "bing": - print(green("[-] Searching in Bing..\n")) - search = SearchEngine("http://www.bing.com/search?q=%40{word}&count=50&first={counter}", domain, limit, 0, 50) + print(green("[+] Searching in Bing..\n")) + search = SearchEngine(bingUrl, domain, limit, 0, 50, userAgent, args.proxy) search.process() all_emails = search.get_emails() elif engine == "ask": - print(green("[-] Searching in ASK..\n")) - search = SearchEngine("http://www.ask.com/web?q=%40{word}", domain, limit, 0, 100) + print(green("[+] Searching in ASK..\n")) + search = SearchEngine(askUrl, domain, limit, 0, 100, userAgent, args.proxy) search.process() all_emails = search.get_emails() elif engine == "yahoo": - print(green("[-] Searching in Yahoo..\n")) - search = SearchEngine("http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}", domain, limit, 1, 100) + print(green("[+] Searching in Yahoo..\n")) + search = SearchEngine(yahooUrl, domain, limit, 1, 100, userAgent, args.proxy) search.process() all_emails = search.get_emails() elif engine == "all": - print(green("[-] Searching everywhere..\n")) + print(green("[+] Searching everywhere..\n")) all_emails = [] - print(green("[-] Searching in Google..\n")) - search = SearchEngine("http://www.google.com/search?num=100&start={counter}&hl=en&q=%40\"{word}\"", domain, limit, 0, 100) + print(green("[+] Searching in Google..\n")) + search = SearchEngine(googleUrl, domain, limit, 0, 100, userAgent, args.proxy) search.process() all_emails.extend(search.get_emails()) - print(green("\n[-] Searching in Bing..\n")) - search = SearchEngine("http://www.bing.com/search?q=%40{word}&count=50&first={counter}", domain, limit, 0, 50) + print(green("\n[+] Searching in Bing..\n")) + search = SearchEngine(bingUrl, domain, limit, 0, 50, userAgent, args.proxy) search.process() all_emails.extend(search.get_emails()) - print(green("\n[-] Searching in ASK..\n")) - search = SearchEngine("http://www.ask.com/web?q=%40{word}", domain, limit, 0, 100) + print(green("\n[+] Searching in ASK..\n")) + search = SearchEngine(askUrl, domain, limit, 0, 100, userAgent, args.proxy) search.process() all_emails.extend(search.get_emails()) - print(green("\n[-] Searching in Yahoo..\n")) - search = SearchEngine("http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}", domain, limit, 1, 100) + print(green("\n[+] Searching in Yahoo..\n")) + search = SearchEngine(yahooUrl, domain, limit, 1, 100, userAgent, args.proxy) search.process() all_emails.extend(search.get_emails()) all_emails = unique(all_emails) diff --git a/README.md b/README.md index 2dae67d..806e4b2 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,12 @@ Requirements Features ===== -* Retrieve Domain email addresses from Search Engines -* Google -* Bing -* Yahoo -* ASK +* Retrieve Domain email addresses from Search Engines (Google, Bing, Yahoo, ASK). +* Export results to txt and xml files. +* Limit search results. +* Define your own User-Agent string. +* Use proxy server. + Download/Installation ==== @@ -33,6 +34,7 @@ Usage ===== ``` usage: EmailHarvester.py [-h] [-d DOMAIN] [-s FILE] [-e ENGINE] [-l LIMIT] + [-u USER-AGENT] [-x PROXY] _____ _ _ _ _ _ | ___| (_)| | | | | | | | @@ -53,6 +55,10 @@ optional arguments: Select search engine(google, bing, yahoo, ask, all). -l LIMIT, --limit LIMIT Limit the number of results. + -u USER-AGENT, --user-agent USER-AGENT + Set the User-Agent request header. + -x PROXY, --proxy PROXY + Setup proxy server (example: http://127.0.0.1:8080) ``` @@ -67,3 +73,11 @@ Examples **Limit results** * ./EmailHarvester.py -d example.com -e all -l 200 +**Export emails** +* ./EmailHarvester.py -d example.com -e all -l 200 -s emails.txt + +**Custom User-Agent string** +* ./EmailHarvester.py -d example.com -e all -u "MyUserAgentString 1.0" + +**Proxy Server** +* ./EmailHarvester.py -d example.com -e all -x http://127.0.0.1:8080