#!/usr/bin/perl -w
#
# uniqmail.pl - This program ensures that only unique email is delivered
# to a user's inbox.  It concatenates md5 checksums of the 'From' header,
# 'Subject' header and body of incoming email.  If the resulting string
# does not already exist in $histlog, it is added to $histlog and the
# email is delivered to the user's inbox.  If the resulting string already
# exists in $histlog, the email is discarded.  One way to use this program
# is to put a simple procmail recipe such as the following at the end of
# a user's ~/.procmailrc file.
#
#   :0
#   | uniqmail.pl
#
# To use it in this way (as I do), procmail must obviously be installed.
# Procmail and it's documentation is available at the following website.
#
#   http://www.procmail.org/
#
# Note that this program requires the Mail::Audit and Digest::MD5 modules.
# These modules, and their depends, are available from CPAN, the Comprehensive
# Perl Archive Network, at http://www.cpan.org/ or via your favorite CPAN
# mirror.
#
# Copyright (C) 2001 Kyle Amon
# All rights reserved.
#
# Author:
#
# Kyle Amon
# GNUTEC, Inc.
# Information Technology Solutions
# (813) 979-1633
# (203) 668-UNIX
# amonk@gnutec.com
# http://www.gnutec.com/
#               
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# Change history:
#
# 0.10    08-30-01        Alpha release
# 0.11    09-02-01        Fixed bugs and added user name determination
# 0.12    09-03-01        Added size limit control for $histlog file
# 0.13    09-03-01        Added feature to ignore numbers in mail body
# 0.14    09-14-01        Made ending percent symbols treated as final digit
#
# TODO (ordered roughly by difficulty/priority):
#
# o  Write some documentation. :-)

require 5.004;
use strict;
use Mail::Audit;
use Digest::MD5;

#
# user defined variables
#

my $spooldir	= '/var/mail';		# mail spool directory
my $homedir	= '/home';		# user home directory
my $maildir	= 'mail';		# user mail directory
my $histlog	= '.history';		# mail history log file
my $histmax	= '300';		# mail history max lines
my $histback	= '';			# backup extension; null = none
my $bodynums	= '1';			# ignore numbers: 0 = off / 1 = on
my $debug	= '0';			# debuging: 0 = off / 1 = on

#
# symbol declerations
#

my ( $lines, $action );

main ();
exit;


#
# main subroutine
#

sub main {

  #
  # secure the environment
  #

  delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};     # make %ENV safer
  $ENV{PATH} = "/bin:/usr/bin";                 # set PATH

  #
  # get some usefull variables
  #

  my $user = getpwuid($<);				# get user name
  $histlog = "$homedir/$user/$maildir/$histlog";	# customize $histlog

  #
  # manage $histlog.debug file
  #

  if ($debug) {
    open (DEBUG, ">>$histlog.debug");	# open $histlog debug file for append
  } elsif ( -e "$histlog.debug" ) {
    unlink "$histlog.debug";		# remove old $histlog debug file
  }

  #
  # manage $histlog file
  #

  $lines = 0;				# initialize $lines
  if (! -e $histlog) {			# create $histlog file if nonexistent
    open (HISTLOG, ">$histlog");
    close (HISTLOG);
  } else {				# count number of lines in $histlog
    open (HISTLOG, "$histlog") or die "Can't open $histlog: $!";
    while (sysread HISTLOG, my $buffer, 4096) {
      $lines += ($buffer =~ tr/\n//);
    }
    close (HISTLOG);
    print DEBUG "\nhistlog: $lines lines\n" if ($debug);
  }

  if ($lines >= $histmax) {
    $ARGV[0] = $histlog;		# fake $histlog as command line arg
    local $^I = $histback;		# set $histlog backup extension
    while (<>) {
      if ($. < ($lines - $histmax) + 2) {
        s/.*\n//;			# delete excess lines from $histlog top
        print DEBUG "delete:  line $.\n" if ($debug);
      }
      print;				# print line
    }
  }

  #
  # create mail object, get usefull headers and body
  #

  my $email = Mail::Audit->new;			# create new email object

  my $from = $email->from();
  my $to = $email->to();
  my $cc = $email->cc();
  my $subject = $email->subject();
  chomp($from, $to, $cc, $subject);
  my $body = $email->body();

  #
  # get md5 checksums
  #

  my $md5 = Digest::MD5->new;			# create new md5 object

  $md5->add("$from");				# add $from to md5 object
  my $fdigest = $md5->b64digest;		# get b64 digest of $from

  $md5->reset;					# reset md5 object

  $md5->add("$subject");			# add $subject to md5 object
  my $sdigest = $md5->b64digest;		# get b64 digest of $subject

  $md5->reset;					# reset md5 object

  foreach my $line (@{$body}) {
    my $tmpline = $line;		# make copy of $line to work on
    if ($bodynums) {
      $tmpline =~ s/^(\s*\d+%*\s+)+//g;	# ignore numbers at begining of line
      $tmpline =~ s/([\s\d]+\d+%*\s+)+//g;# ignore numbers in middle of line
      $tmpline =~ s/(\s+\d+%*\s*)+\n//g;# ignore numbers at end of line
    }
    $md5->add($tmpline);		# add body $line to md5 object
  }
  my $bdigest = $md5->b64digest;		# get b64 digest of body

  print DEBUG "cathash: $fdigest $sdigest $bdigest\n" if ($debug);

  #
  # check if email is a duplicate
  #

  open (HISTLOG, "<$histlog");		# open history log for read
  while (<HISTLOG>) {
    my $match = index($_, "$fdigest" . ' ' . "$sdigest" . ' ' . "$bdigest");
    if ($match >=  0 ) {
      print DEBUG "matched: $_" if ($debug);
      $action = 'ignored';		# mark email to ignore
    }
  }
  close (HISTLOG);

  #
  # ignore or log concatenated hash string and accept email
  #

  if ( $action eq 'ignored' ) {
    print DEBUG "action:  $action\n" if ($debug);
    $email->ignore();				# ignore email
  } else {
    $action = 'accepted';			# mark email as accepted
    open (HISTLOG, ">>$histlog");		# open history log for append
    print HISTLOG "$fdigest ";			# log from digest
    print HISTLOG "$sdigest ";			# log subject digest
    print HISTLOG "$bdigest\n";			# log body digest
    close (HISTLOG);
    print DEBUG "action:  $action\n" if ($debug);
    $email->accept("$spooldir/$user");		# accept email
  }

  close (DEBUG);				# be tidy :-)

}

