email_stats.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. #!/usr/bin/python
  2. #
  3. # email_stats.py
  4. #
  5. # Time vs date plot for an email mbox file.
  6. #
  7. # Copyright (C) 2015 George C. Privon
  8. #
  9. # This program is free software: you can redistribute it and/or modify
  10. # it under the terms of the GNU General Public License as published by
  11. # the Free Software Foundation, either version 3 of the License, or
  12. # (at your option) any later version.
  13. #
  14. # This program is distributed in the hope that it will be useful,
  15. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. # GNU General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. import mailbox
  22. import datetime
  23. import matplotlib.pyplot as plt
  24. import matplotlib.dates as mdates
  25. import argparse
  26. import re
  27. import numpy as np
  28. import cubehelix
  29. parser = argparse.ArgumentParser()
  30. parser.add_argument('mbox', help='Mailbox to analyze.')
  31. parser.add_argument('--plotfile', '-p', default=False, action='store',
  32. help='Name of output plotting file.')
  33. parser.add_argument('--title', '-t', default='Email Send Times',
  34. action='store', help='Plot title.')
  35. parser.add_argument('--sendercolors', '-s', default=False, action='store',
  36. help='Comma separated list of search strings for the \
  37. sender field. Each will be displayed with a \
  38. different color.')
  39. args = parser.parse_args()
  40. plt.figure()
  41. plt.ylim([0, 27])
  42. plt.ylabel('Hour')
  43. plt.xlabel('Date')
  44. plt.minorticks_on()
  45. plt.title(args.title)
  46. a = mailbox.mbox(args.mbox)
  47. pldata = {}
  48. if args.sendercolors:
  49. slist = args.sendercolors.split(',')
  50. nsend = len(slist)
  51. scolor = cubehelix.cmap(startHue=240, endHue=-300,
  52. minSat=1, maxSat=2.5,
  53. minLight=.3, maxLight=.8,
  54. gamma=.9)
  55. for item in slist:
  56. pldata[item] = []
  57. pldata['unknown'] = []
  58. for msg in a:
  59. cid = None
  60. label = None
  61. if msg['date'] is not None:
  62. try:
  63. z = datetime.datetime.strptime(msg['date'], '%a, %d %b %Y %H:%M:%S %z')
  64. except ValueError:
  65. try:
  66. z = datetime.datetime.strptime(msg['date'], '%a, %d %b %Y %H:%M:%S %Z')
  67. except ValueError:
  68. print("Skipping message from " + msg['date'])
  69. continue
  70. if args.sendercolors:
  71. for search in enumerate(slist):
  72. if re.search(search[1], msg['From'], re.IGNORECASE):
  73. cid, label = search
  74. pldata[label].append([z.date(), z.hour + z.minute/60.])
  75. break
  76. if cid is None:
  77. pldata['unknown'].append([z.date(), z.hour + z.minute/60.])
  78. for plid in enumerate(pldata.keys()):
  79. plt.plot_date(np.array(pldata[plid[1]])[:,0],
  80. np.array(pldata[plid[1]])[:,1],
  81. color=scolor(plid[0] / (nsend + 1)),
  82. ls='.',
  83. #tz=z.tzname(),
  84. label=plid[1],
  85. xdate=True)
  86. if args.sendercolors:
  87. plt.legend(title='Sender', loc='upper left', ncol=nsend + 1)
  88. if args.plotfile:
  89. plt.savefig(args.plotfile)
  90. else:
  91. plt.savefig('email_times.png')