From ffc4b5ce605253957b2a0ce82cf924c669806590 Mon Sep 17 00:00:00 2001 From: John Denker Date: Sun, 15 Jul 2012 06:43:36 -0700 Subject: primitive grep-like thing for looking at mail headers --- tools/mail-scan.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/makefile | 2 +- 2 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 tools/mail-scan.c (limited to 'tools') diff --git a/tools/mail-scan.c b/tools/mail-scan.c new file mode 100644 index 0000000..1260f30 --- /dev/null +++ b/tools/mail-scan.c @@ -0,0 +1,219 @@ +/////////////////// +// skrewt.c +// +// scrutinize email +// + +#include +#include /* for exit() */ +#include +#include +#include /* toupper */ +#include +#include + +#include /* perror */ + +using namespace std; + +void usage(const int sts){ + (sts ? cerr : cout) << +"Usage: skrewt [options]\n" +"\n" +" Scrutinizes email. Reads stdin, copies it to stdout.\n" +" Exit result 0 means good, 1 means rejection (spam).\n" +" Writes reason for rejection to stderr.\n" +"\n" +" Typically used as a filter in a pipeline, along with spamc -E\n" +" Options\n" +" -help print this msg (and exit immediately).\n" +" -maxsize ii msg size in bytes; anything bigger will be rejected.\n" +"\n" +" Messages containing the string '-please-bounce-this-' will be rejected.\n" +" Messages with no date will be rejected.\n" +; + exit(sts); +} + +// exit codes, compatible with spamassassin (not with qmail-queue) +const int sa_good(0); +const int sa_spam(1); +const int sa_usage(64); + +///////////////////////////////////////////////////////// +// Case insensitive comparison of strings + +class lessthan_foldcase{ +public: + bool operator() (const std::string& a, const std::string& b) const { + size_t a_len = a.length(); + size_t b_len = b.length(); + + size_t lim = a_len < b_len ? a_len : b_len; + + for (size_t i=0; i chb) return false; + } + // here if one is an extension of the other + if ( a_len < b_len ) return true; + return false; + } +}; + + +// Returns negative if a is less than b in alphabetical order +// returns 0 if they are the same, or positive if a is greater. +// Like perl cmp operator, but ignores case. +int cmp_casefold(const std::string& a, const std::string& b) { + string::const_iterator aa, bb; + aa = a.begin(); + bb = b.begin(); + while (aa != a.end() && bb != b.end()){ + char ca = tolower(*aa++); + char cb = tolower(*bb++); + if (ca != cb) return ca < cb ? -2 : 2; + } + if (aa != a.end()) return 1; // a is longer + if (bb != b.end()) return -1; // b is longer + return 0; +} + + +string toLower(const std::string& a){ + string rslt = a; + string::iterator rr; + for (rr = rslt.begin(); rr != rslt.end(); rr++){ + *rr = tolower(*rr); + } + return rslt; +} + +//////////////// +string ltrim(string foo){ + size_t where = foo.find_first_not_of(" \t\r\n"); + if (where == foo.npos) return foo; + return foo.substr(where); +} + +//////////////// +// little utility to help with argument parsing: +// +int prefix(const string shorter, const string longer){ + return shorter == longer.substr(0, shorter.length()); +} + +void exeunt(const int sts){ + if (sts == sa_good) exit(sts); + + const char* foo = getenv("HI_Q_GROUP"); + if (!foo) exit(sts); + +// No point in signalling ourself: + sighandler_t rslt = signal(SIGUSR1, SIG_IGN); + if (rslt == SIG_ERR) { + cerr << "error setting signal" << endl; + } + int k = kill(-atoi(foo), SIGUSR1); + if (k) { + cerr << "kill failed on group " << atoi(foo) << " ... "; + perror(0); + } + exit(sts); +} + +//////////////////////////////////////////////////////////// +int main(int _argc, const char** _argv){ +//// pid_t pid = getpid(); +//// cout << pid << endl; +//// cout << getpgid(pid) << endl; + int argc(_argc); + const char **argv(_argv); + string progname(*argv); argv++; argc--; + + int maxsize(1000000); + list watchword; + list dofile; + + while (argc) { + string arg(*argv); argv++; argc--; + if (arg.substr(0,2) == "--") arg = arg.substr(1); + if (prefix(arg, "-help")) { + usage(0); + } + if (arg.substr(0,1) == "-") { + cerr << "Unrecognized option '" << arg << "'" << endl; + cerr << "For help, try: " << progname << " -help" << endl; + exit(sa_usage); + } + if (arg.substr(0,1) == "+") { + watchword.push_back(arg.substr(1)); + } else { + dofile.push_back(arg); + } + } + + for (list::const_iterator file = dofile.begin(); + file != dofile.end(); file++) { + ifstream infile; + infile.open(file->c_str()); + if (infile.bad()) { + cerr << "Failed to open file: " << *file << endl; + } + int inheads(1); + string boundary("x-xx-x"); + int msgsize(0); + for (;;){ + if (infile.eof()) break; + if (infile.bad()) return 1; + if (inheads) { + string header; + if (getline(infile, header).fail()) continue; + msgsize += header.length()+1; + for (;;) { + if (infile.eof()) break; + if (infile.bad()) return 1; + char ch; + if (infile.get(ch).fail()) continue; + infile.putback(ch); + if (ch != ' ' && ch != '\t') break; + string line; + if (getline(infile, line).fail()) continue; + msgsize += line.length()+1; + if (msgsize > maxsize) { + cerr << "skrewt rejection: bigger than " << maxsize << endl; + exeunt(sa_spam); + } + header += "\n" + line; + } + if (header.length() == 0) { + inheads = 0; + } + else { + string headword; + string rest; + size_t where = header.find(":"); + if (where != string::npos) { + headword = header.substr(0, where); + rest = ltrim(header.substr(1+where)); + } + headword = toLower(headword); + for (list::const_iterator ptr = watchword.begin(); + ptr != watchword.end(); ptr++) { + if (headword == toLower(*ptr)) { + cout << *file << " :: " << header << endl; + } + } + } +// cout << header << endl; + } else { + // not in header + break; + } + } + } +} diff --git a/tools/makefile b/tools/makefile index 3803e44..7f5dcef 100644 --- a/tools/makefile +++ b/tools/makefile @@ -10,7 +10,7 @@ CC= /usr/bin/g++ -Wall -g -I $(HOME)/lib/include .SECONDARY : # do not remove any intermediate files -progs = pido hi-q skrewt hi-test +progs = pido hi-q skrewt hi-test mail-scan all: $(progs) -- cgit v1.2.3