-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchat_analysis.py
85 lines (67 loc) · 2.32 KB
/
chat_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import string
DATA_FILENAME = "message_logs.txt"
def add_one_to_count(counts, counted, month):
if counted not in counts:
counts[counted] = {}
inner_counts = counts[counted]
if month not in inner_counts:
inner_counts[month] = 0
inner_counts[month] += 1
return counts
def get_counts_per_person():
"""
Return a dict from friend's names to dicts from months to
message counts
"""
counts = {} # initialize a counts dict
with open(DATA_FILENAME, 'r') as f:
for line in f:
line_parts = line.split('\t')
month = int(line_parts[0])
conversation_partner = line_parts[1]
counts = add_one_to_count(counts, conversation_partner, month)
return counts
def get_chat_word_counts():
"""
Return a dict from words to dicts from months to
counts of word usage per month
"""
counts = {} # initialize a counts dict
with open(DATA_FILENAME, 'r') as f:
for line in f:
# turn a line into a list of constituent parts,
# and yank out the relevant bit
line_parts = line.split('\t')
month = int(line_parts[0])
content = without_punctuation(line_parts[2].lower())
status = line_parts[3]
if status == "received":
# only considering messages that I sent
# so if we find one that I received, move on
continue
# return a list of individual words in the message
words = content.split()
for word in words:
counts = add_one_to_count(counts, word, month)
return counts
def get_last_chat_month():
"""
return the largest month in the file, which we'll
use as a proxy for the age of your facebook account
(assumes you've used facebook in the last month.)
"""
with open(DATA_FILENAME, 'r') as f:
lines = f.readlines()
# turn lines into a list of length-4 lists
lines = [line.split("\t") for line in lines]
# uses list comprehension to get back
# just the months from each line
months = [int(line[0]) for line in lines]
return max(months)
def without_punctuation(s):
"""
Strip punctuation from a given string
"""
for punc in string.punctuation:
s = s.replace(punc, "")
return s