diff --git a/google_takeout_to_sqlite/utils.py b/google_takeout_to_sqlite/utils.py index 0fa8010..d9ac4e5 100644 --- a/google_takeout_to_sqlite/utils.py +++ b/google_takeout_to_sqlite/utils.py @@ -62,9 +62,9 @@ def id_for_location_history(row): def get_mbox(mbox_file): num_errors = 0 - print('Preparing to process emails...') + print("Preparing to process emails...") mbox = mailbox.mbox(mbox_file) - print('Processing {} emails'.format(len(mbox))) + print("Processing {} emails".format(len(mbox))) # These are all the Gmail email fields available # ['X-GM-THRID', 'X-Gmail-Labels', 'Delivered-To', 'Received', 'Received', @@ -78,31 +78,29 @@ def get_mbox(mbox_file): for email in track(mbox): try: message = {} - message['Message-Id'] = email['Message-Id'] - message['X-GM-THRID'] = email['X-GM-THRID'] - message['X-Gmail-Labels'] = email['X-Gmail-Labels'] + message["Message-Id"] = email["Message-Id"] + message["X-GM-THRID"] = email["X-GM-THRID"] + message["X-Gmail-Labels"] = email["X-Gmail-Labels"] # These following try/excepts are here because for some reason # these items returned from the mbox module are sometimes strings # and sometimes headers and sometimes None. try: - email['From'].decode('utf-8') + email["From"].decode("utf-8") except AttributeError: - message['From'] = str(email['From']) + message["From"] = str(email["From"]) try: - email['To'].decode('utf-8') + email["To"].decode("utf-8") except AttributeError: - message["To"] = str(email['To']) + message["To"] = str(email["To"]) try: - email['Subject'].decode('utf-8') + email["Subject"].decode("utf-8") except AttributeError: - message["Subject"] = str(email['Subject']) + message["Subject"] = str(email["Subject"]) - - message["date"] = get_message_date(email.get('Date'), - email.get_from()) + message["date"] = get_message_date(email.get("Date"), email.get_from()) message["body"] = get_email_body(email) yield message @@ -110,7 +108,7 @@ def get_mbox(mbox_file): # How does this project want to handle logging? For now we're just # printing out variables num_errors = num_errors + 1 - print('Errors: {}'.format(num_errors)) + print("Errors: {}".format(num_errors)) print(traceback.format_exc()) continue @@ -122,41 +120,40 @@ def save_emails(db, mbox_file): db["mbox_emails"].upsert_all( ( { - "id": message['Message-Id'], - 'X-GM-THRID': message['X-GM-THRID'], - 'X-Gmail-Labels': message['X-Gmail-Labels'], - "From": message['From'], - "To": message['To'], - "Subject": message['Subject'], - "when": message['date'], - "body": message['body'], + "id": message["Message-Id"], + "X-GM-THRID": message["X-GM-THRID"], + "X-Gmail-Labels": message["X-Gmail-Labels"], + "From": message["From"], + "To": message["To"], + "Subject": message["Subject"], + "when": message["date"], + "body": message["body"], } - - for message in get_mbox(mbox_file) + for message in get_mbox(mbox_file) ), pk="id", alter=True, ) - print('Finished loading emails into {}.'.format(mbox_file)) + print("Finished loading emails into {}.".format(mbox_file)) print('Enabling full text search on "body" and "Subject" fields') db["mbox_emails"].enable_fts(["body", "Subject"]) - print('Finished!') + print("Finished!") def get_email_body(message): - ''' + """ return the email body contents - ''' + """ body = None if message.is_multipart(): for part in message.walk(): if part.is_multipart(): for subpart in part.walk(): - if subpart.get_content_type() == 'text/plain': + if subpart.get_content_type() == "text/plain": body = subpart.get_payload(decode=True) - elif part.get_content_type() == 'text/plain': + elif part.get_content_type() == "text/plain": body = part.get_payload(decode=True) - elif message.get_content_type() == 'text/plain': + elif message.get_content_type() == "text/plain": body = message.get_payload(decode=True) return body @@ -170,8 +167,8 @@ def get_message_date(get_date, get_from): datetime_tuple = email.utils.parsedate_tz(mail_date) if datetime_tuple: unix_time = email.utils.mktime_tz(datetime_tuple) - mail_date_iso8601 = datetime.datetime.utcfromtimestamp(unix_time).isoformat(' ') + mail_date_iso8601 = datetime.datetime.utcfromtimestamp(unix_time).isoformat(" ") else: - mail_date_iso8601 = '' + mail_date_iso8601 = "" return mail_date_iso8601 diff --git a/tests/test_gmail_import.py b/tests/test_gmail_import.py index 26e009a..bc12b7c 100644 --- a/tests/test_gmail_import.py +++ b/tests/test_gmail_import.py @@ -10,38 +10,42 @@ def test_import_gmails(): assert "mbox_emails" in set(db.table_names()) mbox_emails = list(sorted(db["mbox_emails"].rows, key=lambda r: r["id"])) assert [ - {'From': '=?UTF-8?Q?=C5=82_Zieli=C5=84ski?= ', - 'Subject': '[fw-general] Zend_Form and generating fields', - 'To': 'fw-general@lists.zend.com', - 'X-GM-THRID': '1277085061787347926', - 'X-Gmail-Labels': 'Unread', - 'body': b'\r\nUnfortunately it is slow! For 10 products it takes 0.6 sec. to' - b' generate.\r\nIs there a better (more efficient) method to build s' - b'uch forms via Zend_Form?\r\n\r\nThe same I noticed when tried to' - b' create a select element which contained\r\nmany options (i.e. lis' - b't of countries). Without ajax (autocomplete) it takes\r\nages to g' - b'enerate and seems to be useless in this case. Shame.\r\n\r\nI wo' - b'nder if Zend_Form can be used when it comes to generate a lot of' - b'\r\ninputs/options in select or I`m forced to create it by han' - b'd?\r\n\r\n\r\n\r\n', - 'id': '<18826312.post@talk.nabble.com>', - 'when': '2008-08-05 08:00:12'}, - {'From': 'Person Person ', - 'Subject': 'Re: [Gnumed-devel] Tree view formatting', - 'To': 'gnumed-devel@gnu.org', - 'X-GM-THRID': '1278204036336346264', - 'X-Gmail-Labels': 'Unread', - 'body': b'On Sun, Aug 17, 2008 at 03:09:55PM -0300, Bob Luz wrote:\r\n\r\n' - b'> when you say you have changed it ... can I assume it will make' - b' the 0.3.0 release\r\nyes\r\n\r\n> or is the release READY\r\nI ' - b'hope it is "ready" so I can release it within the next few\r\ndays' - b'. I usually wait a few days to see whether any errors\r\nshow up. ' - b"That's why we need you guys to test like mad.\r\n\r\n> and all o" - b'ur future discussions on this list\r\n> will from now on to be imp' - b'lemented on the 0.3.1 ?\r\n\r\nNot quite yet. And, rather 0.3+.\r' - b'\n\r\nPerson\r\n\r\n\r\n_________________________________________' - b'______\r\nGnumed-devel mailing list\r\nGnumed-devel@gnu.org\r\nhtt' - b'p://lists.gnu.org/mailman/listinfo/gnumed-devel\r\n', - 'id': '<20080817183915.GM3992@merkur.person.loc>', - 'when': '2008-08-17 18:39:15'} - ] == mbox_emails + { + "From": "=?UTF-8?Q?=C5=82_Zieli=C5=84ski?= ", + "Subject": "[fw-general] Zend_Form and generating fields", + "To": "fw-general@lists.zend.com", + "X-GM-THRID": "1277085061787347926", + "X-Gmail-Labels": "Unread", + "body": b"\r\nUnfortunately it is slow! For 10 products it takes 0.6 sec. to" + b" generate.\r\nIs there a better (more efficient) method to build s" + b"uch forms via Zend_Form?\r\n\r\nThe same I noticed when tried to" + b" create a select element which contained\r\nmany options (i.e. lis" + b"t of countries). Without ajax (autocomplete) it takes\r\nages to g" + b"enerate and seems to be useless in this case. Shame.\r\n\r\nI wo" + b"nder if Zend_Form can be used when it comes to generate a lot of" + b"\r\ninputs/options in select or I`m forced to create it by han" + b"d?\r\n\r\n\r\n\r\n", + "id": "<18826312.post@talk.nabble.com>", + "when": "2008-08-05 08:00:12", + }, + { + "From": "Person Person ", + "Subject": "Re: [Gnumed-devel] Tree view formatting", + "To": "gnumed-devel@gnu.org", + "X-GM-THRID": "1278204036336346264", + "X-Gmail-Labels": "Unread", + "body": b"On Sun, Aug 17, 2008 at 03:09:55PM -0300, Bob Luz wrote:\r\n\r\n" + b"> when you say you have changed it ... can I assume it will make" + b" the 0.3.0 release\r\nyes\r\n\r\n> or is the release READY\r\nI " + b'hope it is "ready" so I can release it within the next few\r\ndays' + b". I usually wait a few days to see whether any errors\r\nshow up. " + b"That's why we need you guys to test like mad.\r\n\r\n> and all o" + b"ur future discussions on this list\r\n> will from now on to be imp" + b"lemented on the 0.3.1 ?\r\n\r\nNot quite yet. And, rather 0.3+.\r" + b"\n\r\nPerson\r\n\r\n\r\n_________________________________________" + b"______\r\nGnumed-devel mailing list\r\nGnumed-devel@gnu.org\r\nhtt" + b"p://lists.gnu.org/mailman/listinfo/gnumed-devel\r\n", + "id": "<20080817183915.GM3992@merkur.person.loc>", + "when": "2008-08-17 18:39:15", + }, + ] == mbox_emails