diff --git a/main.py b/main.py index 8d2d5af..64b3bf6 100755 --- a/main.py +++ b/main.py @@ -28,6 +28,8 @@ dest='pages', default=True) parser.add_option('-d', '--drop-bodies', action='store_true', dest='drop_bodies', default=False) +parser.add_option('-k', '--keep-unfulfilled-requests', action='store_true', + dest='keep_unfulfilled', default=False) parser.add_option('-r', '--resource-usage', action='store_true', dest='resource_usage', default=False) parser.add_option('--pad_missing_tcp_data', action='store_true', @@ -40,6 +42,7 @@ # copy options to settings module settings.process_pages = options.pages settings.drop_bodies = options.drop_bodies +settings.keep_unfulfilled_requests = options.keep_unfulfilled settings.pad_missing_tcp_data = options.pad_missing_tcp_data settings.strict_http_parse_body = options.strict_http_parsing diff --git a/pcap2har/http/flow.py b/pcap2har/http/flow.py index 510989e..74744d0 100644 --- a/pcap2har/http/flow.py +++ b/pcap2har/http/flow.py @@ -39,6 +39,8 @@ def __init__(self, tcpflow): # first request is the benchmark; responses before that # are irrelevant for now self.pairs = [] + # determine a list of responses that we can match up with requests, + # padding the list with None where necessary. try: # find the first response to a request we know about, # that is, the first response after the first request @@ -46,6 +48,10 @@ def __init__(self, tcpflow): lambda response: response.ts_start > requests[0].ts_start, responses ) + except LookupError: + # no responses at all + pairable_responses = [None for i in requests] + else: # these are responses that match up with our requests pairable_responses = responses[first_response_index:] # if there are more requests than responses... @@ -54,24 +60,20 @@ def __init__(self, tcpflow): pairable_responses.extend( [None for i in range(len(requests) - len(pairable_responses))] ) - # if there are more responses, we would just ignore them anyway, - # which zip does for us - # create MessagePair's - connected = False # if conn. timing has been added to a request yet - for req, resp in zip(requests, responses): - if not req: - logging.warning('Request is missing.') - continue - if not connected and tcpflow.handshake: - req.ts_connect = tcpflow.handshake[0].ts - connected = True - else: - req.ts_connect = req.ts_start - self.pairs.append(MessagePair(req, resp)) - except LookupError: - # there were no responses after the first request - # there's nothing we can do - logging.warning('Request has no response.') + # if there are more responses, we would just ignore them anyway, + # which zip does for us + # create MessagePair's + connected = False # if conn. timing has been added to a request yet + for req, resp in zip(requests, pairable_responses): + if not req: + logging.warning('Request is missing.') + continue + if not connected and tcpflow.handshake: + req.ts_connect = tcpflow.handshake[0].ts + connected = True + else: + req.ts_connect = req.ts_start + self.pairs.append(MessagePair(req, resp)) class MessagePair(object): diff --git a/pcap2har/httpsession.py b/pcap2har/httpsession.py index 9e327ca..c96a337 100644 --- a/pcap2har/httpsession.py +++ b/pcap2har/httpsession.py @@ -37,10 +37,6 @@ def __init__(self, request, response): self.pageref = None self.ts_start = int(request.ts_connect*1000) self.startedDateTime = datetime.utcfromtimestamp(request.ts_connect) - endedDateTime = datetime.utcfromtimestamp(response.ts_end) - self.total_time = ms_from_timedelta( - endedDateTime - self.startedDateTime # plus connection time, someday - ) # calculate other timings self.time_blocked = -1 self.time_dnsing = -1 @@ -48,14 +44,20 @@ def __init__(self, request, response): ms_from_dpkt_time(request.ts_start - request.ts_connect)) self.time_sending = ( ms_from_dpkt_time(request.ts_end - request.ts_start)) - self.time_waiting = ( - ms_from_dpkt_time(response.ts_start - request.ts_end)) - self.time_receiving = ( - ms_from_dpkt_time(response.ts_end - response.ts_start)) - # check if timing calculations are consistent - if (self.time_sending + self.time_waiting + self.time_receiving != - self.total_time): - pass + if response is not None: + self.time_waiting = ( + ms_from_dpkt_time(response.ts_start - request.ts_end)) + self.time_receiving = ( + ms_from_dpkt_time(response.ts_end - response.ts_start)) + endedDateTime = datetime.utcfromtimestamp(response.ts_end) + self.total_time = ms_from_timedelta( + endedDateTime - self.startedDateTime + ) + else: + # this can happen if the request never gets a response + self.time_waiting = -1 + self.time_receiving = -1 + self.total_time = -1 def json_repr(self): ''' @@ -169,8 +171,9 @@ def __init__(self, packetdispatcher): # if msg.request has a referer, keep track of that, too if self.page_tracker: entry.pageref = self.page_tracker.getref(entry) - # add it to the list - self.entries.append(entry) + # add it to the list, if we're supposed to keep it. + if entry.response or settings.keep_unfulfilled_requests: + self.entries.append(entry) self.user_agent = self.user_agents.dominant_user_agent() # handle DNS AFTER sorting # this algo depends on first appearance of a name diff --git a/pcap2har/pagetracker.py b/pcap2har/pagetracker.py index 814f4ba..d1f29a6 100644 --- a/pcap2har/pagetracker.py +++ b/pcap2har/pagetracker.py @@ -68,11 +68,13 @@ def is_root_document(entry): guesses whether the entry is from the root document of a web page ''' # guess based on media type - mt = entry.response.mediaType - if mt.type == 'text': - if mt.subtype in ['html', 'xhtml', 'xml']: - # probably... - return True + if entry.response: # might be None + mt = entry.response.mediaType + if mt.type == 'text': + if mt.subtype in ['html', 'xhtml', 'xml']: + # probably... + return True + # else, guess by request url? return False diff --git a/pcap2har/settings.py b/pcap2har/settings.py index 7cf1d45..55ae1f1 100644 --- a/pcap2har/settings.py +++ b/pcap2har/settings.py @@ -7,3 +7,7 @@ # Whether to pad missing data in TCP flows with 0 bytes pad_missing_tcp_data = False + +# Whether to keep requests with missing responses. Could break consumers +# that assume every request has a response. +keep_unfulfilled_requests = False diff --git a/tests/README.txt b/tests/README.txt index 6ba6588..0c33540 100644 --- a/tests/README.txt +++ b/tests/README.txt @@ -35,3 +35,9 @@ pcapr.net.pcap A pageload of pcapr.net, an online pcap repository. Includes a redirect from pcapr.net to pcapr.net/home +missing_response.pcap: +A flow from fhs.pcap with one of the responses missing, to test -k functionality + +request_only.pcap: +Flow with a request and nothing else, to handle a different failure case of -k + diff --git a/tests/missing_response.pcap b/tests/missing_response.pcap new file mode 100644 index 0000000..517acbf Binary files /dev/null and b/tests/missing_response.pcap differ diff --git a/tests/missing_response.pcap.har b/tests/missing_response.pcap.har new file mode 100644 index 0000000..fa658bb --- /dev/null +++ b/tests/missing_response.pcap.har @@ -0,0 +1,215 @@ +{ + "log": { + "browser": { + "name": "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 ( .NET CLR 3.5.30729; .NET4.0E)", + "version": "mumble" + }, + "creator": { + "name": "pcap2har", + "version": "0.1" + }, + "entries": [ + { + "cache": {}, + "pageref": "page_0", + "request": { + "bodySize": 0, + "cookies": [], + "headers": [ + { + "name": "accept-language", + "value": "en-us,en;q=0.5" + }, + { + "name": "connection", + "value": "keep-alive" + }, + { + "name": "keep-alive", + "value": "300" + }, + { + "name": "accept", + "value": "text/xml,application/xml,application/xhtml+xml,*/*;q=0.1" + }, + { + "name": "user-agent", + "value": "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 ( .NET CLR 3.5.30729; .NET4.0E)" + }, + { + "name": "accept-charset", + "value": "ISO-8859-1,utf-8;q=0.7,*;q=0.7" + }, + { + "name": "host", + "value": "andrewfleenor.users.sourceforge.net" + }, + { + "name": "referer", + "value": "http://andrewfleenor.users.sourceforge.net/fhs/fhs.xml" + }, + { + "name": "cookie", + "value": "__utma=191645736.1924309581.1277516327.1278893750.1278979018.10; __utmz=191645736.1278979018.10.3.utmcsr=wiki.wireshark.org|utmccn=(referral)|utmcmd=referral|utmcct=/Development/LibpcapFileFormat; __qca=P0-1746884488-1277513743262" + } + ], + "headersSize": -1, + "httpVersion": "HTTP/1.1", + "method": "GET", + "queryString": [], + "url": "http://andrewfleenor.users.sourceforge.net/fhs/fhs.xsl" + }, + "response": { + "bodySize": 6511, + "content": { + "compression": 0, + "mimeType": "text/xml", + "size": 6511, + "text": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n /\r\n \r\n
\r\n \r\n \r\n \r\n \r\n \r\n \r\n toggleDirExpansion('')\r\n \r\n \r\n \r\n toggleFileExpansion('')\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n dir-closed.png\r\n \r\n dir-empty.png\r\n \r\n file-closed.png\r\n \r\n file-empty.png\r\n \r\n \r\n :img\r\n \r\n \r\n \r\n (): \r\n \r\n \r\n
\r\n \r\n \r\n :inner \r\n \r\n \r\n
\r\n \r\n \r\n \r\n \r\n \r\n \r\n

\r\n
\r\n
\r\n
\r\n
\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n
\r\n
\r\n
\r\n\r\n\r\n\r\n \r\n \r\n Interactive Linux Filesystem Explanation\r\n \r\n \r\n \r\n \r\n \r\n \r\n
\r\n
Interactive Linux Filesystem Explanation
\r\n
Introduction
\r\n \r\n
\r\n \r\n \r\n
\r\n
Explanation
\r\n \r\n \r\n \r\n \r\n \r\n
\r\n \r\n \r\n
\r\n
Works Cited
\r\n
    \r\n \r\n
  • \r\n
    \r\n
\r\n
\r\n \r\n \r\n
\r\n \u00a9Andrew Fleenor, 2010\r\n
\r\n \r\n \r\n
\r\n\r\n
" + }, + "cookies": [], + "headers": [ + { + "name": "content-length", + "value": "6511" + }, + { + "name": "accept-ranges", + "value": "bytes" + }, + { + "name": "expires", + "value": "Fri, 30 Jul 2010 03:31:55 GMT" + }, + { + "name": "server", + "value": "nginx/0.7.63" + }, + { + "name": "last-modified", + "value": "Tue, 15 Jun 2010 21:32:28 GMT" + }, + { + "name": "connection", + "value": "keep-alive" + }, + { + "name": "etag", + "value": "\"196f-48918578e330a\"" + }, + { + "name": "cache-control", + "value": "max-age=172800" + }, + { + "name": "date", + "value": "Wed, 28 Jul 2010 03:31:55 GMT" + }, + { + "name": "content-type", + "value": "text/xml" + } + ], + "headersSize": -1, + "httpVersion": "1.1", + "redirectURL": "", + "status": 200, + "statusText": "OK" + }, + "startedDateTime": "2010-07-28T03:31:53.275921Z", + "time": 4065, + "timings": { + "blocked": -1, + "connect": 104, + "dns": -1, + "receive": 3738, + "send": 0, + "wait": 222 + } + }, + { + "cache": {}, + "pageref": "page_1", + "request": { + "bodySize": 0, + "cookies": [], + "headers": [ + { + "name": "accept-language", + "value": "en-us,en;q=0.5" + }, + { + "name": "connection", + "value": "keep-alive" + }, + { + "name": "keep-alive", + "value": "300" + }, + { + "name": "accept", + "value": "*/*" + }, + { + "name": "user-agent", + "value": "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 ( .NET CLR 3.5.30729; .NET4.0E)" + }, + { + "name": "accept-charset", + "value": "ISO-8859-1,utf-8;q=0.7,*;q=0.7" + }, + { + "name": "host", + "value": "andrewfleenor.users.sourceforge.net" + }, + { + "name": "referer", + "value": "http://andrewfleenor.users.sourceforge.net/fhs/fhs.xml" + }, + { + "name": "cookie", + "value": "__utma=191645736.1924309581.1277516327.1278893750.1278979018.10; __utmz=191645736.1278979018.10.3.utmcsr=wiki.wireshark.org|utmccn=(referral)|utmcmd=referral|utmcct=/Development/LibpcapFileFormat; __qca=P0-1746884488-1277513743262" + } + ], + "headersSize": -1, + "httpVersion": "HTTP/1.1", + "method": "GET", + "queryString": [], + "url": "http://andrewfleenor.users.sourceforge.net/fhs/fhs.js" + }, + "response": null, + "startedDateTime": "2010-07-28T03:31:57.359355Z", + "time": -1, + "timings": { + "blocked": -1, + "connect": 0, + "dns": -1, + "receive": -1, + "send": 0, + "wait": -1 + } + } + ], + "pages": [ + { + "id": "page_0", + "pageTimings": { + "onContentLoad": -1, + "onLoad": -1 + }, + "startedDateTime": "2010-07-28T03:31:53.275921Z", + "title": "http://andrewfleenor.users.sourceforge.net/fhs/fhs.xsl" + }, + { + "id": "page_1", + "pageTimings": { + "onContentLoad": -1, + "onLoad": -1 + }, + "startedDateTime": "2010-07-28T03:31:57.359355Z", + "title": "unknown title" + } + ], + "version": "1.1" + } +} \ No newline at end of file diff --git a/tests/out-of-order.pcap.har b/tests/out-of-order.pcap.har index ede6e32..9eb7083 100644 --- a/tests/out-of-order.pcap.har +++ b/tests/out-of-order.pcap.har @@ -33666,6 +33666,72 @@ "send": 0, "wait": 2128 } + }, + { + "cache": {}, + "pageref": "page_1", + "request": { + "bodySize": 0, + "cookies": [], + "headers": [ + { + "name": "accept-language", + "value": "en-GB, en-US" + }, + { + "name": "accept-encoding", + "value": "gzip" + }, + { + "name": "accept", + "value": "text/xml, text/html, applicati" + }, + { + "name": "user-agent", + "value": "Mozilla/5.0 (Linux; U; Android 2.3.3; en-gb; Desire HD Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1" + }, + { + "name": "accept-charset", + "value": "utf-8, iso-8859-1, utf-16, *;q=0.7" + }, + { + "name": "host", + "value": "www.ynet.co.il" + }, + { + "name": "cookie", + "value": "_chartbeat2=c9ghqpffqo6o3p1n; __utma=1.389587665.1303995315.1304333797.1304342324.6; __utmb=1.1.10.1304342324; __utmc=1; __utmz=1.1303995315.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)" + }, + { + "name": "if-modified-since", + "value": "Mon, 02 May 2011 08:37:05 GMT" + }, + { + "name": "referer", + "value": "http://www.ynet.co.il/home/0,7340,L-8,00.html" + }, + { + "name": "if-none-match", + "value": "\"7323b91da48cc1:dd3\"" + } + ], + "headersSize": -1, + "httpVersion": "HTTP/1.1", + "method": "GET", + "queryString": [], + "url": "http://www.ynet.co.il/PicServer2/24012010/3189349/REUTERS0NYK707DR_BINLADEN-_0502_11871457_ot.jpg" + }, + "response": null, + "startedDateTime": "2011-05-02T13:19:31.572082Z", + "time": -1, + "timings": { + "blocked": -1, + "connect": 0, + "dns": -1, + "receive": -1, + "send": 0, + "wait": -1 + } } ], "pages": [ diff --git a/tests/request_only.pcap b/tests/request_only.pcap new file mode 100644 index 0000000..741014e Binary files /dev/null and b/tests/request_only.pcap differ diff --git a/tests/request_only.pcap.har b/tests/request_only.pcap.har new file mode 100644 index 0000000..956c4f2 --- /dev/null +++ b/tests/request_only.pcap.har @@ -0,0 +1,84 @@ +{ + "log": { + "browser": { + "name": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.0 Safari/537.4", + "version": "mumble" + }, + "creator": { + "name": "pcap2har", + "version": "0.1" + }, + "entries": [ + { + "cache": {}, + "pageref": "page_0", + "request": { + "bodySize": 0, + "cookies": [], + "headers": [ + { + "name": "accept-language", + "value": "en-US,en;q=0.8" + }, + { + "name": "accept-encoding", + "value": "gzip,deflate,sdch" + }, + { + "name": "connection", + "value": "keep-alive" + }, + { + "name": "accept", + "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + }, + { + "name": "user-agent", + "value": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.0 Safari/537.4" + }, + { + "name": "accept-charset", + "value": "ISO-8859-1,utf-8;q=0.7,*;q=0.3" + }, + { + "name": "host", + "value": "localhost:8000" + }, + { + "name": "cache-control", + "value": "max-age=0" + } + ], + "headersSize": -1, + "httpVersion": "HTTP/1.1", + "method": "GET", + "queryString": [], + "url": "http://localhost:8000/non/existent.html" + }, + "response": null, + "startedDateTime": "2012-08-17T15:14:02.607882Z", + "time": -1, + "timings": { + "blocked": -1, + "connect": 0, + "dns": -1, + "receive": -1, + "send": 0, + "wait": -1 + } + } + ], + "pages": [ + { + "id": "page_0", + "pageTimings": { + "onContentLoad": -1, + "onLoad": -1 + }, + "startedDateTime": "2012-08-17T15:14:02.607882Z", + "title": "unknown title" + } + ], + "version": "1.1" + } +} \ No newline at end of file diff --git a/tests/run_tests.sh b/tests/run_tests.sh index d0f1ed1..39d0dd5 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -8,8 +8,8 @@ for pcap in `ls *.pcap` do echo $pcap - # check normal running - if ../main.py $pcap $pcap.new.har + # check normal running (with -k, that's what current test hars use) + if ../main.py -k $pcap $pcap.new.har then if [ -a $pcap.har ] then