Skip to content

Commit

Permalink
Merge remote branch 'origin/master'
Browse files Browse the repository at this point in the history
Update test cases to handle new sorting.
  • Loading branch information
Andrew Fleenor committed Dec 14, 2012
2 parents 2825ec3 + 938787e commit 219d002
Show file tree
Hide file tree
Showing 48 changed files with 2,720 additions and 908 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
pcap2har.log
tests/*.har
tests/*.log
*.sw[op]
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2009 Andrew Fleenor, Ryan C. Witt and Jake Holland
Copyright (c) 2009 Andrew Fleenor, Ryan C. Witt, Jake Holland, and Google, Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
48 changes: 34 additions & 14 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,50 @@
Main program that converts pcaps to HAR's.
'''

import pcap
import os
import optparse
import logging
import sys
import http
import httpsession
import har
import json
import tcp
import settings
from packetdispatcher import PacketDispatcher

from pcap2har import pcap
from pcap2har import http
from pcap2har import httpsession
from pcap2har import har
from pcap2har import tcp
from pcap2har import settings
from pcap2har.packetdispatcher import PacketDispatcher
from pcap2har.pcaputil import print_rusage


# get cmdline args/options
parser = optparse.OptionParser(
usage='usage: %prog inputfile outputfile'
)
parser.add_option('--no-pages', action="store_false", dest="pages", default=True)
parser.add_option('--no-pages', action='store_false',
dest='pages', default=True)
parser.add_option('-d', '--drop-bodies', action='store_true',
dest='drop_bodies', default=False)
parser.add_option('-k', '--keep-unfulfilled-requests', action='store_true',
dest='keep_unfulfilled', default=False)
parser.add_option('-r', '--resource-usage', action='store_true',
dest='resource_usage', default=False)
parser.add_option('--pad_missing_tcp_data', action='store_true',
dest='pad_missing_tcp_data', default=False)
parser.add_option('--strict-http-parsing', action='store_true',
dest='strict_http_parsing', default=False)
parser.add_option('-l', '--log', dest='logfile', default='pcap2har.log')
options, args = parser.parse_args()

# copy options to settings module
settings.process_pages = options.pages
settings.drop_bodies = options.drop_bodies
settings.keep_unfulfilled_requests = options.keep_unfulfilled
settings.pad_missing_tcp_data = options.pad_missing_tcp_data
settings.strict_http_parse_body = options.strict_http_parsing

# setup logs
logging.basicConfig(filename='pcap2har.log', level=logging.INFO)
logging.basicConfig(filename=options.logfile, level=logging.INFO)

# get filenames, or bail out with usage error
if len(args) == 2:
Expand All @@ -40,18 +59,19 @@
parser.print_help()
sys.exit()

logging.info("Processing %s", inputfile)
logging.info('Processing %s', inputfile)

# parse pcap file
dispatcher = PacketDispatcher()
pcap.ParsePcap(dispatcher, filename=inputfile)
dispatcher.finish()
dispatcher = pcap.EasyParsePcap(filename=inputfile)

# parse HAR stuff
session = httpsession.HttpSession(dispatcher)

logging.info("Flows=%d. HTTP pairs=%d" % (len(session.flows),len(session.entries)))
logging.info('Flows=%d. HTTP pairs=%d' % (len(session.flows), len(session.entries)))

#write the HAR file
with open(outputfile, 'w') as f:
json.dump(session, f, cls=har.JsonReprEncoder, indent=2, encoding='utf8', sort_keys=True)

if options.resource_usage:
print_rusage()
160 changes: 0 additions & 160 deletions pcap2har.psproj

This file was deleted.

File renamed without changes.
Empty file added pcap2har/__init__.py
Empty file.
25 changes: 19 additions & 6 deletions dns.py → pcap2har/dns.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging as log
import logging

class Packet:

class Packet(object):
'''
A DNS packet, wrapped for convenience and with the pcap timestamp
Expand All @@ -12,6 +13,7 @@ class Packet:
names = list of names asked about
dns = dpkt.dns.DNS
'''

def __init__(self, ts, pkt):
'''
ts = pcap timestamp
Expand All @@ -22,11 +24,13 @@ def __init__(self, ts, pkt):
self.txid = pkt.id
self.names = [q.name for q in pkt.qd]
if len(self.names) > 1:
log.warning('DNS packet with multiple questions')
logging.warning('DNS packet with multiple questions')

def name(self):
return self.names[0]

class Query:

class Query(object):
'''
A DNS question/answer conversation with a single ID
Expand All @@ -37,6 +41,7 @@ class Query:
name = domain name being discussed
resolved = Bool, whether the question has been answered
'''

def __init__(self, initial_packet):
'''
initial_packet = dns.Packet, simply the first one on the wire with
Expand All @@ -47,19 +52,22 @@ def __init__(self, initial_packet):
self.last_ts = initial_packet.ts
self.resolved = False
self.name = initial_packet.name()

def add(self, pkt):
'''
pkt = dns.Packet
'''
assert(pkt.txid == self.txid)
assert pkt.txid == self.txid
self.last_ts = max(pkt.ts, self.last_ts)
# see if this resolves the query
if len(pkt.dns.an) > 0:
self.resolved = True

def duration(self):
return self.last_ts - self.started_time

class Processor:

class Processor(object):
'''
Processes and interprets DNS packets.
Expand All @@ -69,9 +77,11 @@ class Processor:
queries = {txid: Query}
by_hostname = {string: [Query]}
'''

def __init__(self):
self.queries = {}
self.by_hostname = {}

def add(self, pkt):
'''
adds the packet to a Query object by id, and makes sure that Queryies
Expand All @@ -86,12 +96,14 @@ def add(self, pkt):
new_query = Query(pkt)
self.queries[pkt.txid] = new_query
self.add_by_name(new_query)

def add_by_name(self, query):
name = query.name
if name in self.by_hostname:
self.by_hostname[name].append(query)
else:
self.by_hostname[name] = [query]

def get_resolution_time(self, hostname):
'''
Returns the last time it took to resolve the hostname.
Expand All @@ -104,6 +116,7 @@ def get_resolution_time(self, hostname):
return self.by_hostname[hostname][-1].duration()
except KeyError:
return None

def num_queries(self, hostname):
'''
Returns the number of DNS requests for that name
Expand Down
Loading

0 comments on commit 219d002

Please sign in to comment.