/* ---------------------------------------------------------------------------
 *	"LogAll" feature to log any message through this MTA
 */
{
#define LOGALLHEADER	"X-Logged"
#define LOGALLLEVEL	12

	char logallrcsid[] =
		"$Id: logall.c,v 1.33 2002/08/14 09:05:34 root Exp root $"
		"(c) /ARX cleanware 2002";
/*
 *	$Log: logall.c,v $
 *	Revision 1.33  2002/08/14 09:05:34  root
 *	flush the buffer before unlock
 *
 *	Revision 1.32  2002/07/16 14:06:51  root
 *	use own strdup for 8.12 with pool
 *
 *	Revision 1.31  2002/04/22 19:52:42  root
 *	conf dir for sm 8.12
 *
 *	Revision 1.30  2002/04/22 19:22:45  root
 *	get queue dir from lib call
 *
 *	Revision 1.29  2002/03/18 10:36:44  root
 *	released 8.12.x version
 *
 *	Revision 1.27  2002/03/06 19:06:54  root
 *	conf macro and sm_io lib
 *
 *	Revision 1.26  2002/03/06 18:48:24  root
 *	8.12.1 compiled
 *
 *	Revision 1.25  2001/10/08 08:19:18  root
 *	sm 8.12.x changed macros and io-semantics
 *	alpha alpha
 *
 *	Revision 1.24  2001/04/27 16:26:08  root
 *	multiple exclude headers
 *
 *	Revision 1.23  2001/04/26 04:37:39  root
 *	8.11.3 sendmail
 *
 *	Revision 1.22  2001/04/26 04:36:18  root
 *	prepend From with 'c'
 *
 *	Revision 1.21  2001/02/28 18:46:27  root
 *	comments
 *
 *	Revision 1.20  2000/08/21 07:01:50  root
 *	released
 *
 *	Revision 1.17  2000/07/22 19:39:12  root
 *	bad word scan and reject
 *
 *	Revision 1.16  2000/03/14 05:41:31  root
 *	copyright in binary
 *
 *	Revision 1.14  1999/11/21 11:45:56  root
 *	feature exclude added
 *
 *	Revision 1.13  1998/10/03 11:19:33  root
 *	released
 *
 *	Revision 1.12  1998/09/20 19:45:28  root
 *	8.9.1a stable
 *
 *	Revision 1.11  1998/09/19 10:12:04  root
 *	published
 *
 *	Revision 1.7  1998/09/17 07:12:49  root
 *	clobbered qtimestr
 *

ABSTRACT
========

 This routine logs every message in a mail folder before it is
actually transmitted.  The log is "envelope-oriented" which means
it logs every message once - not for every recipient.  You must
also see the sendmail syslog for actual delivery.  The log
includes full message bodys.

 The routine uses the checkcompat() interface. To every logged mail
a new header "X-Logged" is added which includes machine and time of
the queueing. From LogLevel >=12 the routine logs itself to the
sendmail syslog. For debugging the routine's actions flag -D49.1
should be set.

 Mails which contain configurable headers can be excluded from the
log.  For example all messages from mailling-lists can be excluded
from the log.

 The routine can also search any mail body for a configurable list
of keywords (sex, bomb, virus, ...).  If one of the keywords is
found in the body of the mail or in the subject the mail is
rejected (return to sender without body).

 Pitfalls of this approach: This filter can _NOT_ work on crypted
or compressed mail or on attachments where the body is not
readable.  It will not work if the keywords are splitted on two
lines or scrambled (se-x, vi..rus, ...).

 Also it is possible that the filter will reject mail which should
be passed - for example a man living in "Sussex" who has its
hometown in his mail footer.  He will never be able to mail to
your site.  Also all binary data is likely to have such strings
inside - the longer the more likelihood a match of the keywords
is. 

 To make the filter less restrictive against such large
attachments a maximum file size to scan is configurable and the
keywords must appear as "words" - surrounded by spaces or
non-alphanumeric characters.  So this filter is only usefull for
some kind of a very restrictive site like a school or religious
organisation.

 The activation of this filter also makes the whole mail agent a
lot slower - on heavy loaded systems a hardware upgrade might be
needed.  Mails which are excluded from the log are also excluded
from the scan.

 *	IMPLEMENTATION
 *	The logging code is compiled into the sendmail binary
 *	with the documented and often overlooked checkcompat()
 *	routine:
 *	see "Bryan Costales & Eric Allman: sendmail 2nd Edition
 *		O'Reilly 1997
 *		§20 `The checkcompat() Cookbook' p.285ff"
 *	To check whether a message was already logged it adds
 *	a new header "X-Logged" to the message with the logging
 *	host, the envelope id and queued date/time as values.
 *	As long as this info doesn't change the message is no
 *	more logged.
 *
 *	CONFIGURATION
 *	The logging is configuerd in new macro(s)/class:
 *
 *	D{LogAll}path
 *	where path is the full pathname of the logfile, which
 *	should be mode 600. REMEMBER THIS IS A FULL BODY LOG.
 *	So be prepared for a huge file on site with much traffic.
 *	The file must exist and every mail is appended to it.
 *
 *	D{LogExc}header
 *	specifies headers. LogAll will not log any messages which
 *	have any of these headers. This is useful to exclude all mails from
 *	or to mailing lists for example by specifying "X-Mailing-List".
 *	Separate the headers with spaces.
 *
 *	D{LogMax}max_size_in_bytes
 *	when this macro is defined the filter will scan the subject
 *	and mail body for the list of keywords in the class {LogBad}
 *	and reject any mail which contains one or more of the keywords.
 *	It also specifies the maximum size of a mail body which should be
 *	scanned against the keyword list in class {LogBad}. If it is 0
 *	(not recommended) messages of any size will be scanned (expect
 *	no large attachments to come through and a big waste of bandwidth).
 *
 *	C{LogBad}keyword...
 *	F{LogBad}path
 *	this is the class which must be defined when the macro {LogMax}
 *	is defined and the routine should scan the mail body for bad
 *	keywords. The word are the words in the class. You can either
 *	use the "C" form and include the list of words in the config
 *	file or the "F" form and maintain the list in a separate file.
 *
 *	The macro(s) should be inserted into a m4-config-file for
 *	example like this:
 *
 *		LOCAL_CONFIG
 *		D{LogAll}/var/log/mail.log
 *		D{LogExc}X-Mailing-List
 *		D{LogMax}65535
 *		C{LogBad}sex bomb
 *
 *	INSTALLATION
 *	This source fragment must be included into the source-file:
 *	.../sendmail-8.x.y/src/conf.c
 *	at the following position (with the full path name of its
 *	current location - here "/root/adm/mail/sendmail/logall.c"):
 *
 *>>old		if (tTd(49, 1))
 *>>old			printf("checkcompat(to=%s, from=%s)\n",             
 *>>old				to->q_paddr, e->e_from.q_paddr);           
 *>>old
 *>>new		#include "/root/adm/mail/sendmail/logall.c"
 *>>old
 *>>old		# ifdef EXAMPLE_CODE
 *
 *	the sendmail binary must be remaked and reinstalled at
 *	its proper position (normally /usr/sbin/sendmail).
 *	The daemon must be restarted.
 *
 *	MANAGEMENT
 *	Since the logfile produced by LogAll is a standard Unix-mailfolder,
 *	the logged mails can be managed with every Unix mail-reader or MUA.
 *	The mails inside the log can be deleted, remailed or read with any
 *	mail user agent. If the logfile is the standard mail folder of a
 *	special "LogAll-user" this is especially easy and even "mail"
 *	or a Windows-based IMAP-client can manage the logfile.
 *
 *	LOGGING
 *	The routine logs its usage with LOG_LEVEL>=12 into the standard
 *	sendmail logging file. To debug the use of the routine you must
 *	set debug flag 49.1.
 *
 *	COMPATIBILITY
 *	LogAll is tested with following combinations:
 *	sendmail-8.8.5 /  Linux 2.0.29
 *	sendmail-8.9.1a / Linux 2.0.29
 *	sendmail-8.9.2 /  Linux 2.0.36
 *	sendmail-8.9.2 /  Linux 2.2.16
 *	sendmail-8.11.0 / Linux 2.2.16
 *	sendmail-8.11.3 / Linux 2.2.16
 *	sendmail-8.11.6 / Linux 2.2.19
 *	sendmail-8.12.1 / Linux 2.2.20
 *
 *	AUTHOR
 *	Axel Reinhold - logall@freakout.de
 *
 *	LICENSE/WARRANTY
 *	The software is provided "AS IS" without warranties of any kind,
 *	either expressed or implied, including, but not limited to the
 *	implied warranties of merchantability and fitness for a particular
 *	purpose. The entire risc of the software is with you. In no event
 *	we will be liable for any damages, including any lost profits,
 *	lost savings or other incidental damages arising out of the use
 *	or inability to use the software, even if we have been advised
 *	of the possibility of such damages, or for any claim by another party.
 *	The license is free in every aspect - there are no costs or
 *	restrictions of any kind.
 *   -------------------------------------------------------------------------
 */

#include <assert.h>

#define SM812 SM_CONF_H

#if SM812
	/* dirty hack 8.12.1 needs only one parm for macid	*/
	#define mymacid(id) macid(id)
#else
	#define mymacid(id) macid(id, NULL)
#endif

	int mid;			/* sendmail macro id		    */
	char *logall = NULL;		/* log file path from macro LogAll  */
	char bfpath[MAXLINE];		/* body file path		    */
	char *hlogged = LOGALLHEADER;	/* header field			    */
	char *mailist = NULL;		/* header field for mailing lists   */
	char hlogval[MAXLINE];		/* header value			    */
	char *hlogvalp;			/* header value pointer		    */
	char qtimestr[80];		/* queued time string from asctime  */
	FILE *lf;			/* log file			    */
#if SM812
	SM_FILE_T *bf;			/* body file descriptor for sm 8.12 */
#else
	FILE *bf;			/* body file			    */
#endif
	ADDRESS *a;			/* address structure		    */
	HDR *h;				/* header structure		    */
	int toflag = 0;			/* to header flag		    */
	int fromflag = 0;		/* from header flag		    */
	char *badword = NULL;		/* badword scan flag		    */
	int exclhdr = 0;		/* exclude header scan flag	    */
	long lfpos, bfpos;		/* remember file pos		    */
	size_t b_read;			/* bytes read counter		    */
	long bodybytes = 0;		/* body bytes length read	    */
	unsigned char b_buf[8193];	/* buffer for read/write body	    */
	char *maxscan = NULL;		/* max mail macro for scan	    */
	int maxsize = 0;		/* value for max size to scan	    */
	char *sep_list = " \t\n\r.,-()[]/!?";/* separator list for word scan*/
	char *hsep_list = " \t,";	/* separator list for header scan   */
	char *token;			/* word token for match to badlist  */
	char *hsubjvalp;		/* subject header value pointer	    */
	char subject[MAXLINE];		/* subject header value pointer	    */

	mid = mymacid("{LogAll}");	/* get our config macro		    */
	logall = macvalue(mid, e);	/* to check whether and where to log*/

	/* Use logging if macro is set and we can open the logfile	    */
	if ((logall!=NULL) && ((lf = fopen(logall, "r+"))!=NULL)) {
	  if (tTd(49, 1))		/* debug use of logging		    */
		printf("checkcompat: LogAll=%s e_id=%s\n", logall, e->e_id);

	  /* check whether to scan for bad words			    */
	  mid = mymacid("{LogMax}");	/* get max scan macro		    */
	  maxscan = macvalue(mid, e);
	  if (maxscan!=NULL) {
	  	maxsize = atoi(maxscan);
	  	if (tTd(49, 1))		/* debug use of logging		    */
			printf("checkcompat: LogAll: BadScan=%ld\n", maxsize);
		}

	  /* don't log eXcluded mails				  	    */
	  mid = mymacid("{LogExc}");			/* get exclude macro*/
	  mailist = macvalue(mid, e);
	  if (mailist!=NULL) {
	    token = strtok (mailist, hsep_list);
	    while (token!=NULL) {
		if (hvalue(token, e->e_header)!=NULL) {
			exclhdr = 1;
			break;
		}
		token = strtok(NULL, hsep_list);
	    }
	  }
	  if (mailist==NULL || exclhdr==0) {

	    /* construct the "X-Logged" header field */
	    strcpy(qtimestr, asctime(localtime(&e->e_ctime))); /* get time  */
	    if (qtimestr[strlen(qtimestr)-1]=='\n')	/* remove trailing  */
	    	qtimestr[strlen(qtimestr)-1]='\0';	/* line-feed asctime*/
	    sprintf(hlogval, "Logged by %s as %s at %s",
		MyHostName, e->e_id, qtimestr);		/* our header field */

	    /* check the subject for bad words				    */
	    if (maxscan!=NULL) {
	      hsubjvalp = hvalue("Subject", e->e_header);/* get subject   */
	      if (hsubjvalp!=NULL) {
		mid = mymacid("{LogBad}");
		strncpy(subject, hsubjvalp, sizeof(subject)-1);
		subject[sizeof(subject)-1] = '\0';
		token = strtok(subject, sep_list);
		while (token!=NULL) {
			if (wordinclass(token, mid)) {
				badword = strdup(token);
				break;
				}
			token = strtok(NULL, sep_list);
			}
		}
	      }

	    /* add the header if it doesn't already exist		    */
	    hlogvalp = hvalue(hlogged, e->e_header);	/* get actual field */

#if SM812
	    /* dirty hack 8.12.1 needs envelope as fourth parm              */
	    if (hlogvalp==NULL) addheader(hlogged, hlogval, H_DEFAULT, e);
#elif H_STRIPCOMM
	    /* dirty hack 8.11.0 needs four parms and defines this value    */
	    /* don't know about 8.9.3-8.9.10 maybe clashes at compile	    */
	    if (hlogvalp==NULL) addheader(hlogged, hlogval, H_DEFAULT, &e->e_header);
#else
	    if (hlogvalp==NULL) addheader(hlogged, hlogval, &e->e_header);
#endif

	    /* Log the message if our header didn't exist or was not from us*/
	    if ((hlogvalp==NULL) || (strcmp(hlogvalp, hlogval)!=0)) {

		/* lock the logfile exclusive				    */
		if (lockfile(fileno(lf), logall, NULL, LOCK_EX)) ;
		fseek(lf, 0, SEEK_END);	/* go the eof for appending message */
		lfpos = ftell(lf);		/* remember old position    */

		/* syslog the usage of LogAll				    */
		if (LogLevel >= LOGALLLEVEL) sm_syslog(LOG_INFO, e->e_id,
			"LogAll to %s at %s", logall, qtimestr);

		/* print the Unix From line to separate messages in log file*/
		fprintf(lf, "From %s %s\n", e->e_from.q_user, qtimestr);

		/* scan all headers					    */
		for (h = e->e_header; h != NULL; h = h->h_link) {

			/* check if our header is from us and recent	    */
			/* if not change the field value to our field	    */
			if (strcasecmp(h->h_field, hlogged)==0) {
				if (hlogvalp!=NULL) {
#if SM812
/* --- seems not needed to be free'd	if (h->h_value != NULL) sm_rpool_free(e->e_rpool, h->h_value); */
					h->h_value = sm_rpool_strdup_x(e->e_rpool, hlogval);
#else
					if (h->h_value != NULL) free(h->h_value);
					h->h_value = strdup(hlogval);
#endif
					}
				h->h_flags &= ~H_DEFAULT;	/* set flag */
				}

			/* log the header if appropiate			    */
			if ((h->h_value!=NULL) && ((h->h_flags & H_RESENT)==0)) {
				fprintf(lf, "%s: %s\n", h->h_field, h->h_value);
				}

			if (h->h_flags & H_RCPT) toflag = 1;	/* to-header*/

			}

		if (!toflag) {	/* if no to-header construct from envelope  */
			fprintf(lf, "To:");
			for (a = e->e_sendqueue; a != NULL; a = a->q_next)
				fprintf(lf, " %s", a->q_paddr);
			fprintf(lf, "\n");
			}

		fprintf(lf, "\n");	/* separate headers from body	    */

		/* create body path					    */
#if SM812
		/* body file from routine cause of subdirs function >8.10.x */
		sprintf(bfpath, "%s/df%s", qid_printqueue(e->e_qgrp, e->e_qdir), e->e_id);
#elif H_STRIPCOMM
		sprintf(bfpath, "%s/df%s", qid_printqueue(e->e_queuedir), e->e_id);
#else
		sprintf(bfpath, "%s/df%s", QueueDir, e->e_id); /* body file */
#endif

		/* now add the body from temp file or queued data file	    */
		bf = NULL;
		if (e->e_dfp != NULL) {	/* if body temp file is open use it */
			bf = e->e_dfp;
#if SM812
			assert(bf->sm_magic != NULL && e->e_dfp->f_file != -1);
			bfpos = sm_io_tell(bf, SM_TIME_DEFAULT); /* remember old position */
			sm_io_rewind(bf, SM_TIME_DEFAULT); /* read from beginning */
#else
			bfpos = ftell(bf);	/* remember old position    */
			rewind(bf);		/* read from beginning	    */
#endif
			}
		else	{
#if SM812
			bf = sm_io_fopen(bfpath, SM_TIME_DEFAULT, "r");	/* body file */
#else
			bf = fopen(bfpath, "r");		/* body file*/
#endif
			}

		if (bf==NULL) {			/* no body could be opened  */
			if (tTd(49, 1))
			    printf("checkcompat: fopen(%s) failed\n", bfpath);
			}
		else	{
			mid = mymacid("{LogBad}");
			while ((
#if SM812
				sm_io_fgets(bf, SM_TIME_DEFAULT, b_buf, sizeof(b_buf)-1)
#else
				fgets(b_buf, sizeof(b_buf)-1, bf)
#endif
					) != NULL && badword==NULL) {
				b_read = strlen(b_buf);
				/* if line within mail starts with "From " prepend it with '>' */
				if (strncmp(b_buf, "From ", 5)==0) fputc('>', lf);
				fputs(b_buf, lf);
				/* now scan the body from temp file or queued data file */
			  	if (tTd(49, 1))	printf("checkcompat: LogAll: bodybytes=%ld\n", bodybytes);
				if (maxscan!=NULL && bodybytes<maxsize) {
					b_buf[b_read] = '\0';
					token = strtok(b_buf, sep_list);
					while (token!=NULL) {
					  	if (tTd(49, 1))
							printf("checkcompat: LogAll: tok=\"%s\"\n", token);
						if (wordinclass(token, mid)) {
							badword = strdup(token);
							break;
							}
						token = strtok(NULL, sep_list);
						}
					}
				bodybytes += b_read;
				}
			}

		if (e->e_dfp != NULL)	/* if body temp file is open use it */
#if SM812
		        sm_io_seek(bf, SM_TIME_DEFAULT, bfpos, SEEK_SET);
		else	sm_io_close(bf, SM_TIME_DEFAULT);
#else
			fseek(bf, bfpos, SEEK_SET); /* set old pos    */
		else	fclose(bf);
#endif

		if (badword!=NULL) {
			/* found bad word so discard the last body log	    */
			fseek(lf, lfpos, SEEK_SET); 	/* set old pos      */
			ftruncate(fileno(lf), lfpos);
			}
		else	{
			fprintf(lf, "\n");	/* print trailing empty line*/
			}
		fflush(lf);			/* flush the buffer	    */
		if (lockfile(fileno(lf), logall, NULL, LOCK_UN)) ; /* unlock*/
		}
	    fclose(lf);
	    }
	    if (badword!=NULL) {
		if (LogLevel >= LOGALLLEVEL) sm_syslog(LOG_INFO, e->e_id,
			"LogAll rejected bad word \"%s\" to=%s from=%s\n",
				badword, to->q_paddr, e->e_from.q_paddr);
		free(badword);
		usrerr("553 Bad words in mail not allowed");
		e->e_flags |= EF_NO_BODY_RETN;
		to->q_status = "5.7.1";
		return (EX_UNAVAILABLE);
	    	}
	  }

/*   ------------------------------------------------------------
 *	LogAll End
 *	(c) /ARX cleanware
 *	$Revision: 1.33 $
 *   ------------------------------------------------------------
 */
}
