move yet more stuff; ward.c is starting to get small

author: John Denker <jsd@av8n.com> 2012-11-22 16:39:07 -0800
committer: John Denker <jsd@av8n.com> 2012-11-22 16:39:07 -0800
commit: 4dd94a839cbae8a45889b224945eeaa6fb93b578 (patch)
tree: 1a301b7358e6bac17579fd9ae28da7630840c3d2 /tools/libskrewt.c
parent: 6faf97fd62cc84e2405731534236fb2a93ba4407 (diff)
1 files changed, 260 insertions, 0 deletions
diff --git a/tools/libskrewt.c b/tools/libskrewt.c
index 602fdb7..812b546 100644
--- a/tools/libskrewt.c
+++ b/tools/libskrewt.c
@@ -2,6 +2,7 @@
 #include "utils.h"
 #include <iostream>
 #include <sstream>
+#include <signal.h>
 
 using namespace std;
 
@@ -84,3 +85,262 @@ int skrewt::krunch_rfrom(){
 
   return 0;
 }
+
+int skrewt::interstage(){
+  if (saw_blank_line) {/* ignore */}
+// Note that the headers are in reverse-chronological order:
+  cerr << progid <<" Return-path: " << return_path <<endl;
+
+  { // parse the 'Received: from' line:
+    cerr << "        Received: " << received_from <<endl;
+    int rslt = krunch_rfrom();
+    if (rslt) return rslt;
+    cerr << "         rDNS:     " << proximta_rDNS << endl;
+    cerr << "         HELO:     " << proximta_HELO << endl;
+    cerr << "         IP:       "   << proximta_IP << endl;
+    cerr << "         AuthUser: "   << proximta_AuthUser << endl;
+    cerr << "         Mid       '"  << message_id << "'" << endl;
+  }
+
+  sepofra my_spf;
+  try {
+    my_spf.check(proximta_IP,
+        proximta_HELO,
+        return_path,
+        "junk", 0/* verbosity */);
+    cerr << "*** " << my_spf.explain() << endl;
+  } catch (bad_thing foo) {
+    cerr << "Caught bad thing: " << foo.what() << endl;
+    return ex_syserr;
+  }
+
+// The logic here is:  In order:
+// 1:: If whitelisted, accept.  No greylisting, no spam-checking.
+// 2:: If blacklisted, reject.  No greylisting, no spam-checking.
+// 3:: If good reputation, spam-check it and send it on its way.
+// 4:: If no reputation, greylist.
+// 5:: If bad reputation, ????
+
+// Expanding item 3 to the next level of detail:
+//  3a:: If some domain vouches for this sender-IP via SPF,
+//   then the reputation is bound to the domain.
+//  3c:: If some domain vouches for the message vie DKIM,
+//   then the reputation is bound to the domain.
+//  3d:: If no SPF or DKIM, then the reputation attaches
+//   to the sender-IP.
+
+// Expanding item 4 to the next level of detail:
+//  4a:: If the greylisting database says this message is ripe
+//   spam-check it.  If it's OK, use it to count toward reputation.
+//  4b:: If it is previously unseen or too old, start greylisting
+//   timer from scratch.  Reject with temporary error.
+//  4c:: If it is in the "green" state, let the timer
+//   continue from where it is.  Reject with temporary error.
+
+// Note:  Reputation normally attaches to a domain.
+//  With SPF, the domain vouches for the sender at a given IP address
+//   ... and then the sender implicitly vouches for the message.
+//  With DKIM, the domain vouches for an individual message.
+//  With neither SPF nor DKIM, reputation attaches to the sender's
+//    IP address.  The sender vouches for the message.
+//
+// During greylisting, delay applies to the message.  Reputation
+//  applies to the domain (via SPF or DKIM) or to the server
+//  (otherwise).
+
+
+// If you are a medium-sized operator, such that you have one
+// and only one IP address that ever sends email, and it is a
+// static IP address, then you don't have much to gain from
+// DKIM or SPF.  Attaching a reputation to your domain is not
+// much different from attaching a reputation to your IP address.
+
+// In constrast, if you are a low-budget operator with a
+// dynamic IP address, you benefit from SPF and/or DKIM.
+// Your reputation attaches to your domain, and remains
+// stable even as your IP address changes.
+
+// At the other extreme, if you are a big-time operator
+// such as googlegroups.com, you benefit from DKIM and/or
+// SPF.  Your IP addresses are not dynamic, but they are
+// numerous, so you prefer to have your reputation apply
+// to all your email-sending hosts.
+
+#if 0   /* typical Received-SPF line */
+ Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70;
+#endif
+
+#if 0   /* SPF users */
+ :; mail-scan +received-spf /home/jsd/Maildir/cur[/]*  |
+    sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' |
+    awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr
+     81 <gmail.com>
+     17 <mac.com>
+      8 <gmx.net>
+      8 <bbruner@gmail.com>
+      7 <jsd@av8n.com>
+      6 <kst24@cam.ac.uk>
+      5 <farooq.w@gmail.com>
+      4 <scerri@chem.ucla.edu>
+      4 <comcast.net>
+      4 <c2i.net>
+      3 <gemort2006@gmail.com>
+      2 <rrhake@earthlink.net>
+      2 <hotmail.com>
+      2 <GCC.EDU>
+      1 <us.panasonic.com>
+      1 <sss.pgh.pa.us>
+      1 <scot_wherland@wsu.edu>
+      1 <rpendarvis@brenau.edu>
+      1 <hmperks@gmail.com>
+      1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu>
+      1 <arcor.de>
+#endif
+
+#if 0   /* DKIM users */
+     52 d=googlegroups.com;
+     27 d=barackobama.com;
+     10 d=gmail.com;
+      5 d=bronto.com;
+      5 d=bluehornet.com;
+      4 d=news.abebooks.com;
+      2 d=yahoo.co.uk;
+      2 d=sbcglobal.net;
+      2 d=embarqmail.com;
+      2 d=emailms.angieslist.com;
+      1 d=newsletters.sourceforge.net;
+      1 d=members.ebay.com;
+      1 d=info.citibank.com;
+      1 d=ebay.com;
+      1 d=commail1.co.za;
+#endif
+
+  list<string> badnews;
+  int whitelisted(0);
+
+  if (subject.find("sesame") != string::npos
+        && subject.find("swordfish") != string::npos) {
+    whitelisted++;
+  }
+
+  if (delivered_to.length()){
+    cerr << progid <<  " Delivered-to: <<<" << delivered_to << ">>>" << endl;
+  }
+  if (toLower(trim(delivered_to)) == "jean@av8n.com") {
+    badnews.push_back("Looping Delivered-to: " + delivered_to);
+  }
+
+  if (subject.find("-please-bounce-this-") != string::npos) {
+    badnews.push_back("by request");
+  }
+
+  if (!date.length()) {
+    badnews.push_back("no date");
+  }
+
+  if (mid_required && !message_id.length()) {
+    badnews.push_back("no message-id");
+  }
+
+  if (badnews.size() && !whitelisted){
+    cerr << progid << " " << join(", ", badnews) << endl;
+    if (error_exit){
+      cerr << progid << " '" << from
+             << "' to '" << to
+             << "'" << endl;
+      exeunt(ex_spam);
+    }
+  }
+  return 0;
+}
+
+int skrewt::body(){
+  string main_contype;
+  if (content_type.length())
+    parse_content(content_type, main_contype, boundary);
+// some slightly-useful booleans:
+  int currently_text = main_contype == "text";
+  int main_multipart = main_contype == "multipart";
+
+// early-stage thinking has been done.
+// Now spew the rest of the message
+  //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl;
+  int in_subheads(0);
+  int textlines(0);
+
+  for (;;){             // outer loop over all lines in the body
+    if (cin.eof()) break;
+    if (cin.bad()) return 1;
+    string line;
+// on fail, go back to top of outer loop and check for eof versus bad
+    if (getline(cin, line).fail()) continue;
+    msgsize += line.length()+1;
+    if (msgsize > maxsize) {
+      cerr << progid << " rejection: bigger than " << maxsize << endl;
+      maybe_exeunt(ex_spam, error_exit);
+    }
+    bigbuf.push_back(line);
+    cout << line << endl;
+    if (in_subheads){
+      if (line == "" || line == "\r") in_subheads = 0;
+    }
+    if (in_subheads){
+        string sub_contype;
+        string junk;
+// in principle could worry about folded headers,
+// but in this application it doesn't actually matter
+        string headword;
+        string rest;
+        size_t where = line.find(":");
+        if (where != string::npos) {
+          headword = line.substr(0, where);
+          rest = ltrim(line.substr(1+where));
+        }
+        headword = toLower(headword);
+        if (headword == "content-type") {
+          parse_content(rest, sub_contype, junk);
+          currently_text = sub_contype == "text";
+          //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl;
+        }
+    } else {
+      if (main_multipart && line == "--" + boundary) {
+        //xxxx cerr << "found subhead boundary" << endl;
+        in_subheads = 1;
+        continue;
+      }
+      if (currently_text) textlines++;
+    }
+  }
+
+  if (0) cerr << "textlines: " << textlines << endl;
+  if (!textlines) {
+    cerr << progid << " rejection: no text: " << error_exit << endl;
+    maybe_exeunt(ex_spam, error_exit);
+  }
+  cerr << progid << " normal completion" << endl;
+  return(ex_good);
+}
+
+void maybe_exeunt(const int sts, const int really){
+  if (!really) return;
+  if (sts == ex_good) exit(sts);
+
+  const char* foo = getenv("HI_Q_GROUP");
+  if (!foo) exit(sts);
+
+// No point in signalling ourself:
+  sighandler_t rslt = signal(SIGUSR1, SIG_IGN);
+  if (rslt == SIG_ERR) {
+    cerr << "error setting signal" << endl;
+  }
+  int k = kill(-atoi(foo), SIGUSR1);
+  if (k) {
+    cerr << "kill failed on group " << atoi(foo) << " ... ";
+    perror(0);
+  }
+  exit(sts);
+}
+
+void exeunt(const int sts){
+  maybe_exeunt(sts, 1);
+}
author	John Denker <jsd@av8n.com>	2012-11-22 16:39:07 -0800
committer	John Denker <jsd@av8n.com>	2012-11-22 16:39:07 -0800
commit	4dd94a839cbae8a45889b224945eeaa6fb93b578 (patch)
tree	1a301b7358e6bac17579fd9ae28da7630840c3d2 /tools/libskrewt.c
parent	6faf97fd62cc84e2405731534236fb2a93ba4407 (diff)