Skip to content

Commit

Permalink
Correctly handle requests with no response.
Browse files Browse the repository at this point in the history
Fix ancient variable misspelling, which revealed a bunch of other
bugs when response is None. Add test cases.
  • Loading branch information
Andrew Fleenor committed Aug 21, 2012
1 parent 4120b4f commit 938787e
Show file tree
Hide file tree
Showing 12 changed files with 424 additions and 39 deletions.
3 changes: 3 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
dest='pages', default=True)
parser.add_option('-d', '--drop-bodies', action='store_true',
dest='drop_bodies', default=False)
parser.add_option('-k', '--keep-unfulfilled-requests', action='store_true',
dest='keep_unfulfilled', default=False)
parser.add_option('-r', '--resource-usage', action='store_true',
dest='resource_usage', default=False)
parser.add_option('--pad_missing_tcp_data', action='store_true',
Expand All @@ -40,6 +42,7 @@
# copy options to settings module
settings.process_pages = options.pages
settings.drop_bodies = options.drop_bodies
settings.keep_unfulfilled_requests = options.keep_unfulfilled
settings.pad_missing_tcp_data = options.pad_missing_tcp_data
settings.strict_http_parse_body = options.strict_http_parsing

Expand Down
38 changes: 20 additions & 18 deletions pcap2har/http/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,19 @@ def __init__(self, tcpflow):
# first request is the benchmark; responses before that
# are irrelevant for now
self.pairs = []
# determine a list of responses that we can match up with requests,
# padding the list with None where necessary.
try:
# find the first response to a request we know about,
# that is, the first response after the first request
first_response_index = find_index(
lambda response: response.ts_start > requests[0].ts_start,
responses
)
except LookupError:
# no responses at all
pairable_responses = [None for i in requests]
else:
# these are responses that match up with our requests
pairable_responses = responses[first_response_index:]
# if there are more requests than responses...
Expand All @@ -54,24 +60,20 @@ def __init__(self, tcpflow):
pairable_responses.extend(
[None for i in range(len(requests) - len(pairable_responses))]
)
# if there are more responses, we would just ignore them anyway,
# which zip does for us
# create MessagePair's
connected = False # if conn. timing has been added to a request yet
for req, resp in zip(requests, responses):
if not req:
logging.warning('Request is missing.')
continue
if not connected and tcpflow.handshake:
req.ts_connect = tcpflow.handshake[0].ts
connected = True
else:
req.ts_connect = req.ts_start
self.pairs.append(MessagePair(req, resp))
except LookupError:
# there were no responses after the first request
# there's nothing we can do
logging.warning('Request has no response.')
# if there are more responses, we would just ignore them anyway,
# which zip does for us
# create MessagePair's
connected = False # if conn. timing has been added to a request yet
for req, resp in zip(requests, pairable_responses):
if not req:
logging.warning('Request is missing.')
continue
if not connected and tcpflow.handshake:
req.ts_connect = tcpflow.handshake[0].ts
connected = True
else:
req.ts_connect = req.ts_start
self.pairs.append(MessagePair(req, resp))


class MessagePair(object):
Expand Down
31 changes: 17 additions & 14 deletions pcap2har/httpsession.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,27 @@ def __init__(self, request, response):
self.pageref = None
self.ts_start = int(request.ts_connect*1000)
self.startedDateTime = datetime.utcfromtimestamp(request.ts_connect)
endedDateTime = datetime.utcfromtimestamp(response.ts_end)
self.total_time = ms_from_timedelta(
endedDateTime - self.startedDateTime # plus connection time, someday
)
# calculate other timings
self.time_blocked = -1
self.time_dnsing = -1
self.time_connecting = (
ms_from_dpkt_time(request.ts_start - request.ts_connect))
self.time_sending = (
ms_from_dpkt_time(request.ts_end - request.ts_start))
self.time_waiting = (
ms_from_dpkt_time(response.ts_start - request.ts_end))
self.time_receiving = (
ms_from_dpkt_time(response.ts_end - response.ts_start))
# check if timing calculations are consistent
if (self.time_sending + self.time_waiting + self.time_receiving !=
self.total_time):
pass
if response is not None:
self.time_waiting = (
ms_from_dpkt_time(response.ts_start - request.ts_end))
self.time_receiving = (
ms_from_dpkt_time(response.ts_end - response.ts_start))
endedDateTime = datetime.utcfromtimestamp(response.ts_end)
self.total_time = ms_from_timedelta(
endedDateTime - self.startedDateTime
)
else:
# this can happen if the request never gets a response
self.time_waiting = -1
self.time_receiving = -1
self.total_time = -1

def json_repr(self):
'''
Expand Down Expand Up @@ -169,8 +171,9 @@ def __init__(self, packetdispatcher):
# if msg.request has a referer, keep track of that, too
if self.page_tracker:
entry.pageref = self.page_tracker.getref(entry)
# add it to the list
self.entries.append(entry)
# add it to the list, if we're supposed to keep it.
if entry.response or settings.keep_unfulfilled_requests:
self.entries.append(entry)
self.user_agent = self.user_agents.dominant_user_agent()
# handle DNS AFTER sorting
# this algo depends on first appearance of a name
Expand Down
12 changes: 7 additions & 5 deletions pcap2har/pagetracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,13 @@ def is_root_document(entry):
guesses whether the entry is from the root document of a web page
'''
# guess based on media type
mt = entry.response.mediaType
if mt.type == 'text':
if mt.subtype in ['html', 'xhtml', 'xml']:
# probably...
return True
if entry.response: # might be None
mt = entry.response.mediaType
if mt.type == 'text':
if mt.subtype in ['html', 'xhtml', 'xml']:
# probably...
return True
# else, guess by request url?
return False


Expand Down
4 changes: 4 additions & 0 deletions pcap2har/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@

# Whether to pad missing data in TCP flows with 0 bytes
pad_missing_tcp_data = False

# Whether to keep requests with missing responses. Could break consumers
# that assume every request has a response.
keep_unfulfilled_requests = False
6 changes: 6 additions & 0 deletions tests/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,9 @@ pcapr.net.pcap
A pageload of pcapr.net, an online pcap repository. Includes a redirect
from pcapr.net to pcapr.net/home

missing_response.pcap:
A flow from fhs.pcap with one of the responses missing, to test -k functionality

request_only.pcap:
Flow with a request and nothing else, to handle a different failure case of -k

Binary file added tests/missing_response.pcap
Binary file not shown.
Loading

0 comments on commit 938787e

Please sign in to comment.