|
|
@@ -2,7 +2,9 @@
|
|
|
#
|
|
|
# email_stats.py
|
|
|
#
|
|
|
-# Time vs date plot for an email mbox file.
|
|
|
+# Analytics on Email mbox files
|
|
|
+# - Time vs date plots of emails
|
|
|
+# - Recipients domains as a function of time
|
|
|
#
|
|
|
# Copyright (C) 2015 George C. Privon
|
|
|
#
|
|
|
@@ -32,30 +34,16 @@ import cubehelix
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
parser.add_argument('mbox', help='Mailbox to analyze.')
|
|
|
-parser.add_argument('--plotfile', '-p', default=False, action='store',
|
|
|
- help='Name of output plotting file.')
|
|
|
-parser.add_argument('--title', '-t', default='Email Send Times',
|
|
|
- action='store', help='Plot title.')
|
|
|
+parser.add_argument('--plotroot', '-p', default='email_stats', action='store',
|
|
|
+ help='Root name for output plots.')
|
|
|
+parser.add_argument('--title', '-t', default='',
|
|
|
+ action='store', help='Plot title root.')
|
|
|
parser.add_argument('--sendercolors', '-s', default=False, action='store',
|
|
|
help='Comma separated list of search strings for the \
|
|
|
sender field. Each will be displayed with a \
|
|
|
different color.')
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
-plt.figure()
|
|
|
-plt.ylim([0, 24])
|
|
|
-plt.yticks(4*np.arange(7))
|
|
|
-plt.ylabel('Hour')
|
|
|
-plt.xlabel('Date')
|
|
|
-plt.minorticks_on()
|
|
|
-plt.title(args.title)
|
|
|
-a = mailbox.mbox(args.mbox)
|
|
|
-
|
|
|
-scolor = cubehelix.cmap(startHue=240, endHue=-300,
|
|
|
- minSat=1, maxSat=2.5,
|
|
|
- minLight=.3, maxLight=.8,
|
|
|
- gamma=.9)
|
|
|
-
|
|
|
pldata = {}
|
|
|
if args.sendercolors:
|
|
|
slist = args.sendercolors.split(',')
|
|
|
@@ -64,6 +52,13 @@ if args.sendercolors:
|
|
|
pldata[item] = []
|
|
|
pldata['unknown'] = []
|
|
|
|
|
|
+domains = ['gmail',
|
|
|
+ 'hotmail',
|
|
|
+ 'aol']
|
|
|
+senders = {}
|
|
|
+
|
|
|
+a = mailbox.mbox(args.mbox)
|
|
|
+
|
|
|
for msg in a:
|
|
|
cid = None
|
|
|
label = None
|
|
|
@@ -76,6 +71,16 @@ for msg in a:
|
|
|
except ValueError:
|
|
|
print("Skipping message from " + msg['date'])
|
|
|
continue
|
|
|
+ dateID = "{0:1d}-{1:2d}".format(z.year, z.month)
|
|
|
+ if not(dateID in senders.keys()):
|
|
|
+ senders[dateID] = np.zeros(len(domains) + 1)
|
|
|
+ dmatch = False
|
|
|
+ for i, domain in enumerate(domains):
|
|
|
+ if not(msg['To'] is None) and re.search(domain, msg['To']):
|
|
|
+ senders[dateID][i+1] += 1
|
|
|
+ dmatch = True
|
|
|
+ if not(dmatch):
|
|
|
+ senders[dateID][0] += 1
|
|
|
if args.sendercolors:
|
|
|
for search in enumerate(slist):
|
|
|
if re.search(search[1], msg['From'], re.IGNORECASE):
|
|
|
@@ -85,6 +90,21 @@ for msg in a:
|
|
|
if cid is None:
|
|
|
pldata['unknown'].append([z.date(), z.hour + z.minute/60.])
|
|
|
|
|
|
+# Email send times as a function of day
|
|
|
+
|
|
|
+plt.figure()
|
|
|
+plt.ylim([0, 24])
|
|
|
+plt.yticks(4*np.arange(7))
|
|
|
+plt.ylabel('Hour')
|
|
|
+plt.xlabel('Date')
|
|
|
+plt.minorticks_on()
|
|
|
+plt.title(args.title + ' - Email Send Times')
|
|
|
+
|
|
|
+scolor = cubehelix.cmap(startHue=240, endHue=-300,
|
|
|
+ minSat=1, maxSat=2.5,
|
|
|
+ minLight=.3, maxLight=.8,
|
|
|
+ gamma=.9)
|
|
|
+
|
|
|
if args.sendercolors:
|
|
|
for plid in enumerate(slist):
|
|
|
plt.plot_date(np.array(pldata[plid[1]])[:,0],
|
|
|
@@ -107,7 +127,41 @@ if len(pldata['unknown']) > 0:
|
|
|
if args.sendercolors:
|
|
|
plt.legend(loc='upper left', ncol=nsend + 1)
|
|
|
|
|
|
-if args.plotfile:
|
|
|
- plt.savefig(args.plotfile)
|
|
|
+if args.plotroot:
|
|
|
+ plt.savefig(args.plotroot + '-send_times.png')
|
|
|
+else:
|
|
|
+ plt.savefig('send_times.png')
|
|
|
+
|
|
|
+
|
|
|
+# Email destination domains as a function of month
|
|
|
+plt.figure()
|
|
|
+plt.ylabel('Emails sent to Domain')
|
|
|
+plt.xlabel('Year-Month')
|
|
|
+plt.minorticks_on()
|
|
|
+plt.title(args.title)
|
|
|
+
|
|
|
+months = list(senders.keys())
|
|
|
+months.sort()
|
|
|
+
|
|
|
+for j in range(len(domains) + 1):
|
|
|
+ domainlist = []
|
|
|
+ if j == 0:
|
|
|
+ label = 'Other'
|
|
|
+ else:
|
|
|
+ label = domains[j - 1]
|
|
|
+
|
|
|
+ for month in months:
|
|
|
+ domainlist.append(senders[month][j])
|
|
|
+ plt.plot(np.arange(len(months)),
|
|
|
+ domainlist,
|
|
|
+ label=label)
|
|
|
+
|
|
|
+plt.legend(frameon=False, loc='best')
|
|
|
+plt.xticks(np.arange(len(months)), months, rotation=90)
|
|
|
+plt.setp(plt.axes().get_xticklabels(), visible=False)
|
|
|
+plt.setp(plt.axes().get_xticklabels()[::6], visible=True)
|
|
|
+
|
|
|
+if args.plotroot:
|
|
|
+ plt.savefig(args.plotroot + '-destinations.png', bbox_inches='tight')
|
|
|
else:
|
|
|
- plt.savefig('email_times.png')
|
|
|
+ plt.savefig('email_destinations.png', bbox_inches='tight')
|