email_stats.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #!/usr/bin/python
  2. #
  3. # email_stats.py
  4. #
  5. # Time vs date plot for an email mbox file.
  6. #
  7. # Copyright (C) 2015 George C. Privon
  8. #
  9. # This program is free software: you can redistribute it and/or modify
  10. # it under the terms of the GNU General Public License as published by
  11. # the Free Software Foundation, either version 3 of the License, or
  12. # (at your option) any later version.
  13. #
  14. # This program is distributed in the hope that it will be useful,
  15. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. # GNU General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. import mailbox
  22. import datetime
  23. import matplotlib.pyplot as plt
  24. import matplotlib.dates as mdates
  25. import argparse
  26. import re
  27. import cubehelix
  28. parser = argparse.ArgumentParser()
  29. parser.add_argument('mbox', help='Mailbox to analyze.')
  30. parser.add_argument('--plotfile', '-p', default=False, action='store',
  31. help='Name of output plotting file.')
  32. parser.add_argument('--title', '-t', default='Email Send Times',
  33. action='store', help='Plot title.')
  34. parser.add_argument('--sendercolors', '-s', default=False, action='store',
  35. help='Comma separated list of search strings for the \
  36. sender field. Each will be displayed with a \
  37. different color.')
  38. args = parser.parse_args()
  39. plt.figure()
  40. plt.ylim([0, 24])
  41. plt.ylabel('Hour')
  42. plt.xlabel('Date')
  43. plt.minorticks_on()
  44. plt.title(args.title)
  45. a = mailbox.mbox(args.mbox)
  46. if args.sendercolors:
  47. slist = args.sendercolors.split(',')
  48. nsend = len(slist)
  49. scolor = cubehelix.cmap(startHue=240, endHue=-300,
  50. minSat=1, maxSat=2.5,
  51. minLight=.3, maxLight=.8,
  52. gamma=.9)
  53. for msg in a:
  54. cid = None
  55. label = None
  56. if msg['date'] is not None:
  57. try:
  58. z = datetime.datetime.strptime(msg['date'], '%a, %d %b %Y %H:%M:%S %z')
  59. except ValueError:
  60. try:
  61. z = datetime.datetime.strptime(msg['date'], '%a, %d %b %Y %H:%M:%S %Z')
  62. except ValueError:
  63. print("Skipping message from " + msg['date'])
  64. continue
  65. if args.sendercolors:
  66. for search in enumerate(slist):
  67. if re.search(search[1], msg['From'], re.IGNORECASE):
  68. cid, label = search
  69. break
  70. if cid is None:
  71. cid = nsend+1
  72. label = 'unknown'
  73. plt.plot_date(z.date(),
  74. z.hour + z.minute/60.,
  75. color=scolor(cid / (nsend + 1)),
  76. ls='.',
  77. # tz=z.tzname(),
  78. label=label,
  79. xdate=True)
  80. if args.sendercolors:
  81. plt.legend()
  82. if args.plotfile:
  83. plt.savefig(args.plotfile)
  84. else:
  85. plt.savefig('email_times.png')