Bläddra i källkod

search and inbox and compute how many emails go to specified domains per month. plot results

George C. Privon 10 år sedan
förälder
incheckning
3a146c1f72
1 ändrade filer med 104 tillägg och 0 borttagningar
  1. 104 0
      email_destinations.py

+ 104 - 0
email_destinations.py

@@ -0,0 +1,104 @@
+#!/usr/bin/python
+#
+# email_stats.py
+#
+# Time vs date plot for an email mbox file.
+#
+# Copyright (C) 2015 George C. Privon
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+import mailbox
+import datetime
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+import argparse
+import re
+import numpy as np
+import cubehelix
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('mbox', help='Mailbox to analyze.')
+parser.add_argument('--plotfile', '-p', default=False, action='store',
+                    help='Name of output plotting file.')
+parser.add_argument('--title', '-t', default='Email Destinations',
+                    action='store', help='Plot title.')
+args = parser.parse_args()
+
+plt.figure()
+plt.ylabel('Emails sent to Domain')
+plt.xlabel('Year-Month')
+plt.minorticks_on()
+plt.title(args.title)
+a = mailbox.mbox(args.mbox)
+
+scolor = cubehelix.cmap(startHue=240, endHue=-300,
+                        minSat=1, maxSat=2.5,
+                        minLight=.3, maxLight=.8,
+                        gamma=.9)
+
+domains = ['gmail',
+           'hotmail',
+           'aol']
+senders = {}
+for msg in a:
+    cid = None
+    label = None
+    if msg['date'] is not None:
+        try:
+            z = datetime.datetime.strptime(msg['date'], '%a, %d %b %Y %H:%M:%S %z')
+        except ValueError:
+            try:
+                z = datetime.datetime.strptime(msg['date'], '%a, %d %b %Y %H:%M:%S %Z')
+            except ValueError:
+                print("Skipping message from " + msg['date'])
+                continue
+        dateID = "{0:1d}-{1:2d}".format(z.year, z.month)
+        if not(dateID in senders.keys()):
+            senders[dateID] = np.zeros(len(domains) + 1)
+        dmatch = False
+        for i, domain in enumerate(domains):
+            if not(msg['To'] is None) and re.search(domain, msg['To']):
+                senders[dateID][i+1] += 1
+                dmatch = True
+        if not(dmatch):
+            senders[dateID][0] += 1
+
+months = list(senders.keys())
+months.sort()
+
+for j in range(len(domains) + 1):
+    domainlist = []
+    if j == 0:
+        label = 'Other'
+    else:
+        label = domains[j - 1]
+
+    for month in months:
+        domainlist.append(senders[month][j])
+    plt.plot(np.arange(len(months)),
+             domainlist,
+             label=label)
+
+plt.legend(frameon=False, loc='best')
+plt.xticks(np.arange(len(months)), months, rotation=90)
+plt.setp(plt.axes().get_xticklabels(), visible=False)
+plt.setp(plt.axes().get_xticklabels()[::6], visible=True)
+
+if args.plotfile:
+    plt.savefig(args.plotfile, bbox_inches='tight')
+else:
+    plt.savefig('email_destinations.png', bbox_inches='tight')