move yet more stuff; ward.c is starting to get small

author: John Denker <jsd@av8n.com> 2012-11-22 16:39:07 -0800
committer: John Denker <jsd@av8n.com> 2012-11-22 16:39:07 -0800
commit: 4dd94a839cbae8a45889b224945eeaa6fb93b578 (patch)
tree: 1a301b7358e6bac17579fd9ae28da7630840c3d2 /tools
parent: 6faf97fd62cc84e2405731534236fb2a93ba4407 (diff)
3 files changed, 264 insertions, 260 deletions
diff --git a/tools/libskrewt.c b/tools/libskrewt.c
index 602fdb7..812b546 100644
--- a/tools/libskrewt.c
+++ b/tools/libskrewt.c
@@ -2,6 +2,7 @@
 #include "utils.h"
 #include <iostream>
 #include <sstream>
+#include <signal.h>
 
 using namespace std;
 
@@ -84,3 +85,262 @@ int skrewt::krunch_rfrom(){
 
   return 0;
 }
+
+int skrewt::interstage(){
+  if (saw_blank_line) {/* ignore */}
+// Note that the headers are in reverse-chronological order:
+  cerr << progid <<" Return-path: " << return_path <<endl;
+
+  { // parse the 'Received: from' line:
+    cerr << "        Received: " << received_from <<endl;
+    int rslt = krunch_rfrom();
+    if (rslt) return rslt;
+    cerr << "         rDNS:     " << proximta_rDNS << endl;
+    cerr << "         HELO:     " << proximta_HELO << endl;
+    cerr << "         IP:       "   << proximta_IP << endl;
+    cerr << "         AuthUser: "   << proximta_AuthUser << endl;
+    cerr << "         Mid       '"  << message_id << "'" << endl;
+  }
+
+  sepofra my_spf;
+  try {
+    my_spf.check(proximta_IP,
+        proximta_HELO,
+        return_path,
+        "junk", 0/* verbosity */);
+    cerr << "*** " << my_spf.explain() << endl;
+  } catch (bad_thing foo) {
+    cerr << "Caught bad thing: " << foo.what() << endl;
+    return ex_syserr;
+  }
+
+// The logic here is:  In order:
+// 1:: If whitelisted, accept.  No greylisting, no spam-checking.
+// 2:: If blacklisted, reject.  No greylisting, no spam-checking.
+// 3:: If good reputation, spam-check it and send it on its way.
+// 4:: If no reputation, greylist.
+// 5:: If bad reputation, ????
+
+// Expanding item 3 to the next level of detail:
+//  3a:: If some domain vouches for this sender-IP via SPF,
+//   then the reputation is bound to the domain.
+//  3c:: If some domain vouches for the message vie DKIM,
+//   then the reputation is bound to the domain.
+//  3d:: If no SPF or DKIM, then the reputation attaches
+//   to the sender-IP.
+
+// Expanding item 4 to the next level of detail:
+//  4a:: If the greylisting database says this message is ripe
+//   spam-check it.  If it's OK, use it to count toward reputation.
+//  4b:: If it is previously unseen or too old, start greylisting
+//   timer from scratch.  Reject with temporary error.
+//  4c:: If it is in the "green" state, let the timer
+//   continue from where it is.  Reject with temporary error.
+
+// Note:  Reputation normally attaches to a domain.
+//  With SPF, the domain vouches for the sender at a given IP address
+//   ... and then the sender implicitly vouches for the message.
+//  With DKIM, the domain vouches for an individual message.
+//  With neither SPF nor DKIM, reputation attaches to the sender's
+//    IP address.  The sender vouches for the message.
+//
+// During greylisting, delay applies to the message.  Reputation
+//  applies to the domain (via SPF or DKIM) or to the server
+//  (otherwise).
+
+
+// If you are a medium-sized operator, such that you have one
+// and only one IP address that ever sends email, and it is a
+// static IP address, then you don't have much to gain from
+// DKIM or SPF.  Attaching a reputation to your domain is not
+// much different from attaching a reputation to your IP address.
+
+// In constrast, if you are a low-budget operator with a
+// dynamic IP address, you benefit from SPF and/or DKIM.
+// Your reputation attaches to your domain, and remains
+// stable even as your IP address changes.
+
+// At the other extreme, if you are a big-time operator
+// such as googlegroups.com, you benefit from DKIM and/or
+// SPF.  Your IP addresses are not dynamic, but they are
+// numerous, so you prefer to have your reputation apply
+// to all your email-sending hosts.
+
+#if 0   /* typical Received-SPF line */
+ Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70;
+#endif
+
+#if 0   /* SPF users */
+ :; mail-scan +received-spf /home/jsd/Maildir/cur[/]*  |
+    sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' |
+    awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr
+     81 <gmail.com>
+     17 <mac.com>
+      8 <gmx.net>
+      8 <bbruner@gmail.com>
+      7 <jsd@av8n.com>
+      6 <kst24@cam.ac.uk>
+      5 <farooq.w@gmail.com>
+      4 <scerri@chem.ucla.edu>
+      4 <comcast.net>
+      4 <c2i.net>
+      3 <gemort2006@gmail.com>
+      2 <rrhake@earthlink.net>
+      2 <hotmail.com>
+      2 <GCC.EDU>
+      1 <us.panasonic.com>
+      1 <sss.pgh.pa.us>
+      1 <scot_wherland@wsu.edu>
+      1 <rpendarvis@brenau.edu>
+      1 <hmperks@gmail.com>
+      1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu>
+      1 <arcor.de>
+#endif
+
+#if 0   /* DKIM users */
+     52 d=googlegroups.com;
+     27 d=barackobama.com;
+     10 d=gmail.com;
+      5 d=bronto.com;
+      5 d=bluehornet.com;
+      4 d=news.abebooks.com;
+      2 d=yahoo.co.uk;
+      2 d=sbcglobal.net;
+      2 d=embarqmail.com;
+      2 d=emailms.angieslist.com;
+      1 d=newsletters.sourceforge.net;
+      1 d=members.ebay.com;
+      1 d=info.citibank.com;
+      1 d=ebay.com;
+      1 d=commail1.co.za;
+#endif
+
+  list<string> badnews;
+  int whitelisted(0);
+
+  if (subject.find("sesame") != string::npos
+        && subject.find("swordfish") != string::npos) {
+    whitelisted++;
+  }
+
+  if (delivered_to.length()){
+    cerr << progid <<  " Delivered-to: <<<" << delivered_to << ">>>" << endl;
+  }
+  if (toLower(trim(delivered_to)) == "jean@av8n.com") {
+    badnews.push_back("Looping Delivered-to: " + delivered_to);
+  }
+
+  if (subject.find("-please-bounce-this-") != string::npos) {
+    badnews.push_back("by request");
+  }
+
+  if (!date.length()) {
+    badnews.push_back("no date");
+  }
+
+  if (mid_required && !message_id.length()) {
+    badnews.push_back("no message-id");
+  }
+
+  if (badnews.size() && !whitelisted){
+    cerr << progid << " " << join(", ", badnews) << endl;
+    if (error_exit){
+      cerr << progid << " '" << from
+             << "' to '" << to
+             << "'" << endl;
+      exeunt(ex_spam);
+    }
+  }
+  return 0;
+}
+
+int skrewt::body(){
+  string main_contype;
+  if (content_type.length())
+    parse_content(content_type, main_contype, boundary);
+// some slightly-useful booleans:
+  int currently_text = main_contype == "text";
+  int main_multipart = main_contype == "multipart";
+
+// early-stage thinking has been done.
+// Now spew the rest of the message
+  //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl;
+  int in_subheads(0);
+  int textlines(0);
+
+  for (;;){             // outer loop over all lines in the body
+    if (cin.eof()) break;
+    if (cin.bad()) return 1;
+    string line;
+// on fail, go back to top of outer loop and check for eof versus bad
+    if (getline(cin, line).fail()) continue;
+    msgsize += line.length()+1;
+    if (msgsize > maxsize) {
+      cerr << progid << " rejection: bigger than " << maxsize << endl;
+      maybe_exeunt(ex_spam, error_exit);
+    }
+    bigbuf.push_back(line);
+    cout << line << endl;
+    if (in_subheads){
+      if (line == "" || line == "\r") in_subheads = 0;
+    }
+    if (in_subheads){
+        string sub_contype;
+        string junk;
+// in principle could worry about folded headers,
+// but in this application it doesn't actually matter
+        string headword;
+        string rest;
+        size_t where = line.find(":");
+        if (where != string::npos) {
+          headword = line.substr(0, where);
+          rest = ltrim(line.substr(1+where));
+        }
+        headword = toLower(headword);
+        if (headword == "content-type") {
+          parse_content(rest, sub_contype, junk);
+          currently_text = sub_contype == "text";
+          //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl;
+        }
+    } else {
+      if (main_multipart && line == "--" + boundary) {
+        //xxxx cerr << "found subhead boundary" << endl;
+        in_subheads = 1;
+        continue;
+      }
+      if (currently_text) textlines++;
+    }
+  }
+
+  if (0) cerr << "textlines: " << textlines << endl;
+  if (!textlines) {
+    cerr << progid << " rejection: no text: " << error_exit << endl;
+    maybe_exeunt(ex_spam, error_exit);
+  }
+  cerr << progid << " normal completion" << endl;
+  return(ex_good);
+}
+
+void maybe_exeunt(const int sts, const int really){
+  if (!really) return;
+  if (sts == ex_good) exit(sts);
+
+  const char* foo = getenv("HI_Q_GROUP");
+  if (!foo) exit(sts);
+
+// No point in signalling ourself:
+  sighandler_t rslt = signal(SIGUSR1, SIG_IGN);
+  if (rslt == SIG_ERR) {
+    cerr << "error setting signal" << endl;
+  }
+  int k = kill(-atoi(foo), SIGUSR1);
+  if (k) {
+    cerr << "kill failed on group " << atoi(foo) << " ... ";
+    perror(0);
+  }
+  exit(sts);
+}
+
+void exeunt(const int sts){
+  maybe_exeunt(sts, 1);
+}
diff --git a/tools/libskrewt.h b/tools/libskrewt.h
index 94d06dd..c5fea7e 100644
--- a/tools/libskrewt.h
+++ b/tools/libskrewt.h
@@ -1,5 +1,6 @@
 #include <string>
 #include <vector>
+#include "sepofra.h"
 #include "qq_exit_codes.h"              // a bit of a kludge
 extern std::string progid;
 
@@ -42,3 +43,6 @@ public:
 
 void parse_content(const std::string type_spec_line,
         std::string &maintype, std::string &boundary);
+
+void exeunt(const int sts);
+void maybe_exeunt(const int sts, const int really);
diff --git a/tools/ward.c b/tools/ward.c
index 6280a28..277fd76 100644
--- a/tools/ward.c
+++ b/tools/ward.c
@@ -8,7 +8,6 @@
 #include <stdlib.h>             /* for exit() */
 #include <string>               /* for strcmp() */
 #include <ctype.h>              /* toupper */
-#include <signal.h>
 
 #include <stdio.h>              /* perror */
 #include <sstream>
@@ -38,33 +37,8 @@ void usage(const int sts){
 }
 
 #include "libskrewt.h"
-#include "sepofra.h"
 #include "utils.h"
 
-void maybe_exeunt(const int sts, const int really){
-  if (!really) return;
-  if (sts == ex_good) exit(sts);
-
-  const char* foo = getenv("HI_Q_GROUP");
-  if (!foo) exit(sts);
-
-// No point in signalling ourself:
-  sighandler_t rslt = signal(SIGUSR1, SIG_IGN);
-  if (rslt == SIG_ERR) {
-    cerr << "error setting signal" << endl;
-  }
-  int k = kill(-atoi(foo), SIGUSR1);
-  if (k) {
-    cerr << "kill failed on group " << atoi(foo) << " ... ";
-    perror(0);
-  }
-  exit(sts);
-}
-
-void exeunt(const int sts){
-  maybe_exeunt(sts, 1);
-}
-
 string progname, progid;
 int mypid;
 
@@ -181,240 +155,6 @@ int skrewt::headers(){
   return 0;
 }
 
-int skrewt::interstage(){
-  if (saw_blank_line) {/* ignore */}
-// Note that the headers are in reverse-chronological order:
-  cerr << progid <<" Return-path: " << return_path <<endl;
-
-  { // parse the 'Received: from' line:
-    cerr << "        Received: " << received_from <<endl;
-    int rslt = krunch_rfrom();
-    if (rslt) return rslt;
-    cerr << "         rDNS:     " << proximta_rDNS << endl;
-    cerr << "         HELO:     " << proximta_HELO << endl;
-    cerr << "         IP:       "   << proximta_IP << endl;
-    cerr << "         AuthUser: "   << proximta_AuthUser << endl;
-    cerr << "         Mid       '"  << message_id << "'" << endl;
-  }
-
-  sepofra my_spf;
-  try {
-    my_spf.check(proximta_IP,
-        proximta_HELO,
-        return_path,
-        "junk", 0/* verbosity */);
-    cerr << "*** " << my_spf.explain() << endl;
-  } catch (bad_thing foo) {
-    cerr << "Caught bad thing: " << foo.what() << endl;
-    return ex_syserr;
-  }
-
-// The logic here is:  In order:
-// 1:: If whitelisted, accept.  No greylisting, no spam-checking.
-// 2:: If blacklisted, reject.  No greylisting, no spam-checking.
-// 3:: If good reputation, spam-check it and send it on its way.
-// 4:: If no reputation, greylist.
-// 5:: If bad reputation, ????
-
-// Expanding item 3 to the next level of detail:
-//  3a:: If some domain vouches for this sender-IP via SPF,
-//   then the reputation is bound to the domain.
-//  3c:: If some domain vouches for the message vie DKIM,
-//   then the reputation is bound to the domain.
-//  3d:: If no SPF or DKIM, then the reputation attaches
-//   to the sender-IP.
-
-// Expanding item 4 to the next level of detail:
-//  4a:: If the greylisting database says this message is ripe
-//   spam-check it.  If it's OK, use it to count toward reputation.
-//  4b:: If it is previously unseen or too old, start greylisting
-//   timer from scratch.  Reject with temporary error.
-//  4c:: If it is in the "green" state, let the timer
-//   continue from where it is.  Reject with temporary error.
-
-// Note:  Reputation normally attaches to a domain.
-//  With SPF, the domain vouches for the sender at a given IP address
-//   ... and then the sender implicitly vouches for the message.
-//  With DKIM, the domain vouches for an individual message.
-//  With neither SPF nor DKIM, reputation attaches to the sender's
-//    IP address.  The sender vouches for the message.
-//
-// During greylisting, delay applies to the message.  Reputation
-//  applies to the domain (via SPF or DKIM) or to the server
-//  (otherwise).
-
-
-// If you are a medium-sized operator, such that you have one
-// and only one IP address that ever sends email, and it is a
-// static IP address, then you don't have much to gain from
-// DKIM or SPF.  Attaching a reputation to your domain is not
-// much different from attaching a reputation to your IP address.
-
-// In constrast, if you are a low-budget operator with a
-// dynamic IP address, you benefit from SPF and/or DKIM.
-// Your reputation attaches to your domain, and remains
-// stable even as your IP address changes.
-
-// At the other extreme, if you are a big-time operator
-// such as googlegroups.com, you benefit from DKIM and/or
-// SPF.  Your IP addresses are not dynamic, but they are
-// numerous, so you prefer to have your reputation apply
-// to all your email-sending hosts.
-
-#if 0   /* typical Received-SPF line */
- Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70;
-#endif
-
-#if 0   /* SPF users */
- :; mail-scan +received-spf /home/jsd/Maildir/cur[/]*  |
-    sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' |
-    awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr
-     81 <gmail.com>
-     17 <mac.com>
-      8 <gmx.net>
-      8 <bbruner@gmail.com>
-      7 <jsd@av8n.com>
-      6 <kst24@cam.ac.uk>
-      5 <farooq.w@gmail.com>
-      4 <scerri@chem.ucla.edu>
-      4 <comcast.net>
-      4 <c2i.net>
-      3 <gemort2006@gmail.com>
-      2 <rrhake@earthlink.net>
-      2 <hotmail.com>
-      2 <GCC.EDU>
-      1 <us.panasonic.com>
-      1 <sss.pgh.pa.us>
-      1 <scot_wherland@wsu.edu>
-      1 <rpendarvis@brenau.edu>
-      1 <hmperks@gmail.com>
-      1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu>
-      1 <arcor.de>
-#endif
-
-#if 0   /* DKIM users */
-     52 d=googlegroups.com;
-     27 d=barackobama.com;
-     10 d=gmail.com;
-      5 d=bronto.com;
-      5 d=bluehornet.com;
-      4 d=news.abebooks.com;
-      2 d=yahoo.co.uk;
-      2 d=sbcglobal.net;
-      2 d=embarqmail.com;
-      2 d=emailms.angieslist.com;
-      1 d=newsletters.sourceforge.net;
-      1 d=members.ebay.com;
-      1 d=info.citibank.com;
-      1 d=ebay.com;
-      1 d=commail1.co.za;
-#endif
-
-  list<string> badnews;
-  int whitelisted(0);
-
-  if (subject.find("sesame") != string::npos
-        && subject.find("swordfish") != string::npos) {
-    whitelisted++;
-  }
-
-  if (delivered_to.length()){
-    cerr << progid <<  " Delivered-to: <<<" << delivered_to << ">>>" << endl;
-  }
-  if (toLower(trim(delivered_to)) == "jean@av8n.com") {
-    badnews.push_back("Looping Delivered-to: " + delivered_to);
-  }
-
-  if (subject.find("-please-bounce-this-") != string::npos) {
-    badnews.push_back("by request");
-  }
-
-  if (!date.length()) {
-    badnews.push_back("no date");
-  }
-
-  if (mid_required && !message_id.length()) {
-    badnews.push_back("no message-id");
-  }
-
-  if (badnews.size() && !whitelisted){
-    cerr << progid << " " << join(", ", badnews) << endl;
-    if (error_exit){
-      cerr << progid << " '" << from
-             << "' to '" << to
-             << "'" << endl;
-      exeunt(ex_spam);
-    }
-  }
-  return 0;
-}
-
-int skrewt::body(){
-  string main_contype;
-  if (content_type.length())
-    parse_content(content_type, main_contype, boundary);
-// some slightly-useful booleans:
-  int currently_text = main_contype == "text";
-  int main_multipart = main_contype == "multipart";
-
-// early-stage thinking has been done.
-// Now spew the rest of the message
-  //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl;
-  int in_subheads(0);
-  int textlines(0);
-
-  for (;;){             // outer loop over all lines in the body
-    if (cin.eof()) break;
-    if (cin.bad()) return 1;
-    string line;
-// on fail, go back to top of outer loop and check for eof versus bad
-    if (getline(cin, line).fail()) continue;
-    msgsize += line.length()+1;
-    if (msgsize > maxsize) {
-      cerr << progid << " rejection: bigger than " << maxsize << endl;
-      maybe_exeunt(ex_spam, error_exit);
-    }
-    bigbuf.push_back(line);
-    cout << line << endl;
-    if (in_subheads){
-      if (line == "" || line == "\r") in_subheads = 0;
-    }
-    if (in_subheads){
-        string sub_contype;
-        string junk;
-// in principle could worry about folded headers,
-// but in this application it doesn't actually matter
-        string headword;
-        string rest;
-        size_t where = line.find(":");
-        if (where != string::npos) {
-          headword = line.substr(0, where);
-          rest = ltrim(line.substr(1+where));
-        }
-        headword = toLower(headword);
-        if (headword == "content-type") {
-          parse_content(rest, sub_contype, junk);
-          currently_text = sub_contype == "text";
-          //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl;
-        }
-    } else {
-      if (main_multipart && line == "--" + boundary) {
-        //xxxx cerr << "found subhead boundary" << endl;
-        in_subheads = 1;
-        continue;
-      }
-      if (currently_text) textlines++;
-    }
-  }
-
-  if (0) cerr << "textlines: " << textlines << endl;
-  if (!textlines) {
-    cerr << progid << " rejection: no text: " << error_exit << endl;
-    maybe_exeunt(ex_spam, error_exit);
-  }
-  cerr << progid << " normal completion" << endl;
-  return(ex_good);
-}
 
 ////////////////////////////////////////////////////////////
 int main(int _argc, const char** _argv){
author	John Denker <jsd@av8n.com>	2012-11-22 16:39:07 -0800
committer	John Denker <jsd@av8n.com>	2012-11-22 16:39:07 -0800
commit	4dd94a839cbae8a45889b224945eeaa6fb93b578 (patch)
tree	1a301b7358e6bac17579fd9ae28da7630840c3d2 /tools
parent	6faf97fd62cc84e2405731534236fb2a93ba4407 (diff)