Explorar el Código

search and inbox and compute how many emails go to specified domains per month. plot results

George C. Privon hace 10 años
padre
commit
3a146c1f72
Se han modificado 1 ficheros con 104 adiciones y 0 borrados
  1. 104 0
      email_destinations.py

+ 104 - 0
email_destinations.py

@@ -0,0 +1,104 @@
+#!/usr/bin/python
+#
+# email_stats.py
+#
+# Time vs date plot for an email mbox file.
+#
+# Copyright (C) 2015 George C. Privon
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+import mailbox
+import datetime
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+import argparse
+import re
+import numpy as np
+import cubehelix
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('mbox', help='Mailbox to analyze.')
+parser.add_argument('--plotfile', '-p', default=False, action='store',
+                    help='Name of output plotting file.')
+parser.add_argument('--title', '-t', default='Email Destinations',
+                    action='store', help='Plot title.')
+args = parser.parse_args()
+
+plt.figure()
+plt.ylabel('Emails sent to Domain')
+plt.xlabel('Year-Month')
+plt.minorticks_on()
+plt.title(args.title)
+a = mailbox.mbox(args.mbox)
+
+scolor = cubehelix.cmap(startHue=240, endHue=-300,
+                        minSat=1, maxSat=2.5,
+                        minLight=.3, maxLight=.8,
+                        gamma=.9)
+
+domains = ['gmail',
+           'hotmail',
+           'aol']
+senders = {}
+for msg in a:
+    cid = None
+    label = None
+    if msg['date'] is not None:
+        try:
+            z = datetime.datetime.strptime(msg['date'], '%a, %d %b %Y %H:%M:%S %z')
+        except ValueError:
+            try:
+                z = datetime.datetime.strptime(msg['date'], '%a, %d %b %Y %H:%M:%S %Z')
+            except ValueError:
+                print("Skipping message from " + msg['date'])
+                continue
+        dateID = "{0:1d}-{1:2d}".format(z.year, z.month)
+        if not(dateID in senders.keys()):
+            senders[dateID] = np.zeros(len(domains) + 1)
+        dmatch = False
+        for i, domain in enumerate(domains):
+            if not(msg['To'] is None) and re.search(domain, msg['To']):
+                senders[dateID][i+1] += 1
+                dmatch = True
+        if not(dmatch):
+            senders[dateID][0] += 1
+
+months = list(senders.keys())
+months.sort()
+
+for j in range(len(domains) + 1):
+    domainlist = []
+    if j == 0:
+        label = 'Other'
+    else:
+        label = domains[j - 1]
+
+    for month in months:
+        domainlist.append(senders[month][j])
+    plt.plot(np.arange(len(months)),
+             domainlist,
+             label=label)
+
+plt.legend(frameon=False, loc='best')
+plt.xticks(np.arange(len(months)), months, rotation=90)
+plt.setp(plt.axes().get_xticklabels(), visible=False)
+plt.setp(plt.axes().get_xticklabels()[::6], visible=True)
+
+if args.plotfile:
+    plt.savefig(args.plotfile, bbox_inches='tight')
+else:
+    plt.savefig('email_destinations.png', bbox_inches='tight')