From ffc4b5ce605253957b2a0ce82cf924c669806590 Mon Sep 17 00:00:00 2001
From: John Denker <jsd@av8n.com>
Date: Sun, 15 Jul 2012 06:43:36 -0700
Subject: primitive grep-like thing for looking at mail headers

---
 tools/mail-scan.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tools/makefile    |   2 +-
 2 files changed, 220 insertions(+), 1 deletion(-)
 create mode 100644 tools/mail-scan.c

(limited to 'tools')

diff --git a/tools/mail-scan.c b/tools/mail-scan.c
new file mode 100644
index 0000000..1260f30
--- /dev/null
+++ b/tools/mail-scan.c
@@ -0,0 +1,219 @@
+///////////////////
+// skrewt.c
+//
+// scrutinize email
+//
+
+#include <iostream>
+#include <stdlib.h>             /* for exit() */
+#include <string>
+#include <list>
+#include <ctype.h>              /* toupper */
+#include <signal.h>
+#include <fstream>
+
+#include <stdio.h>              /* perror */
+
+using namespace std;
+
+void usage(const int sts){
+  (sts ? cerr : cout) <<
+"Usage: skrewt [options]\n"
+"\n"
+"  Scrutinizes email.  Reads stdin, copies it to stdout.\n"
+"  Exit result 0 means good, 1 means rejection (spam).\n"
+"  Writes reason for rejection to stderr.\n"
+"\n"
+"  Typically used as a filter in a pipeline, along with spamc -E\n"
+"  Options\n"
+"    -help              print this msg (and exit immediately).\n"
+"    -maxsize ii        msg size in bytes; anything bigger will be rejected.\n"
+"\n"
+"  Messages containing the string '-please-bounce-this-' will be rejected.\n"
+"  Messages with no date will be rejected.\n"
+;
+  exit(sts);
+}
+
+// exit codes, compatible with spamassassin (not with qmail-queue)
+const int sa_good(0);
+const int sa_spam(1);
+const int sa_usage(64);
+
+/////////////////////////////////////////////////////////
+// Case insensitive comparison of strings
+
+class lessthan_foldcase{
+public:
+  bool operator() (const std::string& a, const std::string& b) const {
+    size_t a_len = a.length();
+    size_t b_len = b.length();
+
+    size_t lim = a_len < b_len ? a_len : b_len;
+
+    for (size_t i=0; i<lim; ++i)
+    {
+            char cha = toupper(a[i]);
+            char chb = toupper(b[i]);
+
+            if (cha < chb) return true;
+            if (cha > chb) return false;
+    }
+    // here if one is an extension of the other
+    if ( a_len < b_len ) return true;
+    return false;
+  }
+};
+
+
+// Returns negative if a is less than b in alphabetical order
+// returns 0 if they are the same, or positive if a is greater.
+// Like perl cmp operator, but ignores case.
+int cmp_casefold(const std::string& a, const std::string& b) {
+  string::const_iterator aa, bb;
+  aa = a.begin();
+  bb = b.begin();
+  while (aa != a.end() && bb != b.end()){
+    char ca = tolower(*aa++);
+    char cb = tolower(*bb++);
+    if (ca != cb) return ca < cb ? -2 : 2;
+  }
+  if (aa != a.end()) return 1;          // a is longer
+  if (bb != b.end()) return -1;         // b is longer
+  return 0;
+}
+
+
+string toLower(const std::string& a){
+  string rslt = a;
+  string::iterator rr;
+  for (rr = rslt.begin(); rr != rslt.end(); rr++){
+    *rr = tolower(*rr);
+  }
+  return rslt;
+}
+
+////////////////
+string ltrim(string foo){
+  size_t where = foo.find_first_not_of(" \t\r\n");
+  if (where == foo.npos) return foo;
+  return foo.substr(where);
+}
+
+////////////////
+// little utility to help with argument parsing:
+//
+int prefix(const string shorter, const string longer){
+  return shorter == longer.substr(0, shorter.length());
+}
+
+void exeunt(const int sts){
+  if (sts == sa_good) exit(sts);
+
+  const char* foo = getenv("HI_Q_GROUP");
+  if (!foo) exit(sts);
+
+// No point in signalling ourself:
+  sighandler_t rslt = signal(SIGUSR1, SIG_IGN);
+  if (rslt == SIG_ERR) {
+    cerr << "error setting signal" << endl;
+  }
+  int k = kill(-atoi(foo), SIGUSR1);
+  if (k) {
+    cerr << "kill failed on group " << atoi(foo) << " ... ";
+    perror(0);
+  }
+  exit(sts);
+}
+
+////////////////////////////////////////////////////////////
+int main(int _argc, const char** _argv){
+////  pid_t pid = getpid();
+////  cout << pid << endl;
+////  cout << getpgid(pid) << endl;
+  int argc(_argc);
+  const char **argv(_argv);
+  string progname(*argv); argv++; argc--;
+
+  int maxsize(1000000);
+  list<string> watchword;
+  list<string> dofile;
+
+  while (argc) {
+    string arg(*argv); argv++; argc--;
+    if (arg.substr(0,2) == "--") arg = arg.substr(1);
+    if (prefix(arg, "-help")) {
+      usage(0);
+    }
+    if (arg.substr(0,1) == "-") {
+      cerr << "Unrecognized option '" << arg << "'" << endl;
+      cerr << "For help, try:  " << progname << " -help" << endl;
+      exit(sa_usage);
+    }
+    if (arg.substr(0,1) == "+") {
+      watchword.push_back(arg.substr(1));
+    } else {
+      dofile.push_back(arg);
+    }
+  }
+
+  for (list<string>::const_iterator file = dofile.begin();
+       file != dofile.end(); file++) {
+    ifstream infile;
+    infile.open(file->c_str());
+    if (infile.bad()) {
+      cerr << "Failed to open file: " << *file << endl;
+    }
+    int inheads(1);
+    string boundary("x-xx-x");
+    int msgsize(0);
+    for (;;){
+      if (infile.eof()) break;
+      if (infile.bad()) return 1;
+      if (inheads) {
+        string header;
+        if (getline(infile, header).fail()) continue;
+        msgsize += header.length()+1;
+        for (;;) {
+          if (infile.eof()) break;
+          if (infile.bad()) return 1;
+          char ch;
+          if (infile.get(ch).fail()) continue;
+          infile.putback(ch);
+          if (ch != ' ' && ch != '\t') break;
+          string line;
+          if (getline(infile, line).fail()) continue;
+          msgsize += line.length()+1;
+          if (msgsize > maxsize) {
+            cerr << "skrewt rejection: bigger than " << maxsize << endl;
+            exeunt(sa_spam);
+          }
+          header += "\n" + line;
+        }
+        if (header.length() == 0) {
+          inheads = 0;
+        }
+        else {
+          string headword;
+          string rest;
+          size_t where = header.find(":");
+          if (where != string::npos) {
+            headword = header.substr(0, where);
+            rest = ltrim(header.substr(1+where));
+          }
+          headword = toLower(headword);
+          for (list<string>::const_iterator ptr = watchword.begin();
+            ptr != watchword.end(); ptr++) {
+            if (headword == toLower(*ptr)) {
+              cout << *file << " :: " << header << endl;
+            }
+          }
+        }
+//        cout << header << endl;
+      } else {
+        // not in header
+        break;
+      }
+    }
+  }
+}
diff --git a/tools/makefile b/tools/makefile
index 3803e44..7f5dcef 100644
--- a/tools/makefile
+++ b/tools/makefile
@@ -10,7 +10,7 @@ CC= /usr/bin/g++ -Wall -g -I $(HOME)/lib/include
 
 .SECONDARY : # do not remove any intermediate files
 
-progs = pido hi-q skrewt hi-test
+progs = pido hi-q skrewt hi-test mail-scan
 
 all: $(progs)
 
-- 
cgit v1.2.3