diff options
| author | John Denker <jsd@av8n.com> | 2012-11-24 09:03:18 -0800 | 
|---|---|---|
| committer | John Denker <jsd@av8n.com> | 2012-11-24 09:03:18 -0800 | 
| commit | 7abce5d60408c7713181249ba3f23b72a40aa326 (patch) | |
| tree | f89f7c75bdd955015849d2ac793169b9d2cdf27c /tools/skrewt.c | |
| parent | 3043e470e483da2452943bd810256ea0ff8807b0 (diff) | |
new libskrewt-based code is now stable and in use;
let it be the basis for further developments
Diffstat (limited to 'tools/skrewt.c')
| -rw-r--r-- | tools/skrewt.c | 498 | 
1 files changed, 21 insertions, 477 deletions
| diff --git a/tools/skrewt.c b/tools/skrewt.c index 63c6be6..abea289 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -8,7 +8,6 @@  #include <stdlib.h>             /* for exit() */  #include <string>               /* for strcmp() */  #include <ctype.h>              /* toupper */ -#include <signal.h>  #include <stdio.h>              /* perror */  #include <sstream> @@ -37,33 +36,8 @@ void usage(const int sts){    exit(sts);  } -#include "qq_exit_codes.h" +#include "libskrewt.h"  #include "utils.h" -#include "sepofra.h" - -void maybe_exeunt(const int sts, const int really){ -  if (!really) return; -  if (sts == ex_good) exit(sts); - -  const char* foo = getenv("HI_Q_GROUP"); -  if (!foo) exit(sts); - -// No point in signalling ourself: -  sighandler_t rslt = signal(SIGUSR1, SIG_IGN); -  if (rslt == SIG_ERR) { -    cerr << "error setting signal" << endl; -  } -  int k = kill(-atoi(foo), SIGUSR1); -  if (k) { -    cerr << "kill failed on group " << atoi(foo) << " ... "; -    perror(0); -  } -  exit(sts); -} - -void exeunt(const int sts){ -  maybe_exeunt(sts, 1); -}  string progname, progid;  int mypid; @@ -72,87 +46,6 @@ int mypid;  /* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165"    */  // -void parse_content(const string type_spec_line, -        string &maintype, string &boundary) { -  //xxx cerr << "parser called with: " << type_spec_line << endl; -  string get_type(type_spec_line); - -  size_t where = get_type.find_first_of(" \t;\n"); -  string rest; -  if (where == string::npos) { -    // keep whole string -  } -  else { -    rest = get_type.substr(where+1); -    get_type = get_type.substr(0,where); -  } -  where = get_type.find("/"); -  if (where == string::npos){ -    maintype = ""; -    cerr << "could not find / in " << get_type << endl; -  } else { -    maintype = get_type.substr(0, where); -  } - -// now need to find boundary - -  string srch = "boundary="; -  where = rest.find(srch); -  if (where != string::npos) { -    where += srch.length(); -    boundary = rest.substr(where); -    if (boundary[0] == '"') { -      boundary = boundary.substr(1); -      where = boundary.find_first_of("\""); -    } else { -      where = boundary.find_first_of(" \t;\n"); -    } -    if (where == string::npos) { -      /* do nothing, boundary=boundary as a whole */ -    } else { -      boundary = boundary.substr(0, where); -    } -  } else { -    //xxxxxxx cerr << "boundary= not found in " << type_spec_line << endl; -  } -} - -class skrewt{ -public: -  string received_from;         // envelope HELO among other things -    string proximta_HELO; -    string proximta_rDNS; -    string proximta_IP; -    string proximta_AuthUser; -  string return_path;           // envelope MAIL FROM -  string boundary; -  string to; -  string from; -  string subject; -  string date; -  string message_id; -  string content_type; -  string delivered_to; -  int msgsize; -  vector<string> bigbuf; -  int saw_blank_line; -  int recno; - -  int maxsize; -  int error_exit; -  int mid_required; - -  // constructor -  skrewt() -  : boundary("x-xx-x"), msgsize(0), saw_blank_line(0), recno(0), -    maxsize(1000*1000), error_exit(0), mid_required(0) -  {} - -  int headers(); -  int interstage(); -  int body(); -  int krunch_rfrom(); -};  #if 0   /* typical "Received: from" lines */  Received: from lists.sourceforge.net (216.34.181.88) @@ -178,360 +71,6 @@ Received: from ip68-231-191-153.tc.ph.cox.net (HELO asclepias.av8n.net) (smtp@68  /home/jsd/Maildir/cur/1342363199.24320.cloud:2,  #endif -int skrewt::krunch_rfrom(){ -  stringstream parse; -  parse.str(received_from); -  string word; -  parse >> word; -  if (word != "from") { -    cerr << progid << " bad 'Received: from' line ... '" -        << word << "'" << endl; -    return ex_syserr; -  } -  parse >> proximta_rDNS; -  parse >> word; -  if (word == "(HELO") { -    parse >> proximta_HELO; -    proximta_HELO = rtrim(proximta_HELO, "()"); -    parse >> word; -  } else { -    proximta_HELO = proximta_rDNS; -  } -  size_t len = word.length(); -  if (len<2 || word[0] != '(' || word[len-1] != ')') { -    cerr << progid << " bad 'Received: from' line ;;; '" -        << word << "'" << endl; -    return ex_syserr; -  } -  proximta_IP = word.substr(1, len-2); -  size_t where = proximta_IP.find("@"); -  if (where != string::npos){ -    proximta_AuthUser = proximta_IP.substr(0, where); -    proximta_IP = proximta_IP.substr(1+where); -  } - -  return 0; -} - -int skrewt::headers(){ -  //xxxx cerr << progid << " begins" << endl; -  for (;;){             // outer loop over all records in the header -    if (cin.eof()) break; -    if (cin.bad()) return 1; - -    string line; -// on fail, go back to top of outer loop and check for eof versus bad -    if (getline(cin, line).fail()) continue; -    msgsize += line.length()+1; -    if (msgsize > maxsize) { -      cerr << progid << " rejection: bigger than " << maxsize << endl; -      exeunt(ex_spam); -    } -    cout << line << endl; -    bigbuf.push_back(line); -    string headrec = noCR(line);       // for a folded record, this is the first line - -    for (;;) {        // inner loop to build a multi-line record e.g. folded record: -      if (cin.eof()) break; -      if (cin.bad()) return 1; -      char ch; -      if (cin.get(ch).fail()) continue; -      cin.putback(ch); -      if (ch != ' ' && ch != '\t') break; -      string line; -// on fail, go back to top of inner loop and check for eof versus bad -      if (getline(cin, line).fail()) continue; -      msgsize += line.length()+1; -      if (msgsize > maxsize) { -        cerr << progid << " rejection: bigger than " << maxsize << endl; -        exeunt(ex_spam); -      } -      cout << line << endl; -      bigbuf.push_back(line); -      headrec += "\n" + noCR(line); -    } -// here with a fully assembled header record -// headrec (unlike line) contains no DOS CR characters -    int len = headrec.length(); -    if (len == 0) { -      saw_blank_line = 1; -      break;            // no more headers in this message -    } - -// here if it's a header line -    string headword; -    string rest; -    size_t where = headrec.find(":"); -    if (where != string::npos) { -      headword = headrec.substr(0, where); -      rest = ltrim(headrec.substr(1+where)); -    } -    headword = toLower(headword); -    if (0){ -    } else if (headword == "from") { -      from = rest; -    } else if (headword == "to") { -      to = rest; -    } else if (headword == "return-path") { -      return_path = rest; -    } else if (headword == "message-id") { -      message_id = rest; -    } else if (headword == "received") { -      if (!received_from.length() && prefix("from ", rest)){ -        received_from = rest; -      } -    } else if (headword == "date") { -      date = rest; -    } else if (headword == "subject") { -      subject = rest; -    } else if (headword == "content-type") { -      content_type = rest; -    } else if (headword == "delivered-to") { -      delivered_to = rest; -    } -    //xxxx  cout << headrec.length() << " ... "; -    recno++; -    if (0) if (recno <= 6) cerr << progid << "#" << recno -        << " " << headrec << endl; -  } -  return 0; -} - -int skrewt::interstage(){ -  if (saw_blank_line) {/* ignore */} -// Note that the headers are in reverse-chronological order: -  cerr << progid <<" Return-path: " << return_path <<endl; - -  { // parse the 'Received: from' line: -    cerr << "        Received: " << received_from <<endl; -    int rslt = krunch_rfrom(); -    if (rslt) return rslt; -    cerr << "         rDNS:     " << proximta_rDNS << endl; -    cerr << "         HELO:     " << proximta_HELO << endl; -    cerr << "         IP:       "   << proximta_IP << endl; -    cerr << "         AuthUser: "   << proximta_AuthUser << endl; -    cerr << "         Mid       '"  << message_id << "'" << endl; -  } - -  sepofra my_spf; -  try { -    my_spf.check(proximta_IP, -        proximta_HELO, -        return_path, -        "junk", 0/* verbosity */); -    cerr << "*** " << my_spf.explain() << endl; -  } catch (bad_thing foo) { -    cerr << "Caught bad thing: " << foo.what() << endl; -    return ex_syserr; -  } - -// The logic here is:  In order: -// 1:: If whitelisted, accept.  No greylisting, no spam-checking. -// 2:: If blacklisted, reject.  No greylisting, no spam-checking. -// 3:: If good reputation, spam-check it and send it on its way. -// 4:: If no reputation, greylist. -// 5:: If bad reputation, ???? - -// Expanding item 3 to the next level of detail: -//  3a:: If some domain vouches for this sender-IP via SPF, -//   then the reputation is bound to the domain. -//  3c:: If some domain vouches for the message vie DKIM, -//   then the reputation is bound to the domain. -//  3d:: If no SPF or DKIM, then the reputation attaches -//   to the sender-IP. - -// Expanding item 4 to the next level of detail: -//  4a:: If the greylisting database says this message is ripe -//   spam-check it.  If it's OK, use it to count toward reputation. -//  4b:: If it is previously unseen or too old, start greylisting -//   timer from scratch.  Reject with temporary error. -//  4c:: If it is in the "green" state, let the timer -//   continue from where it is.  Reject with temporary error. - -// Note:  Reputation normally attaches to a domain. -//  With SPF, the domain vouches for the sender at a given IP address -//   ... and then the sender implicitly vouches for the message. -//  With DKIM, the domain vouches for an individual message. -//  With neither SPF nor DKIM, reputation attaches to the sender's -//    IP address.  The sender vouches for the message. -// -// During greylisting, delay applies to the message.  Reputation -//  applies to the domain (via SPF or DKIM) or to the server -//  (otherwise). - - -// If you are a medium-sized operator, such that you have one -// and only one IP address that ever sends email, and it is a -// static IP address, then you don't have much to gain from -// DKIM or SPF.  Attaching a reputation to your domain is not -// much different from attaching a reputation to your IP address. - -// In constrast, if you are a low-budget operator with a -// dynamic IP address, you benefit from SPF and/or DKIM. -// Your reputation attaches to your domain, and remains -// stable even as your IP address changes. - -// At the other extreme, if you are a big-time operator -// such as googlegroups.com, you benefit from DKIM and/or -// SPF.  Your IP addresses are not dynamic, but they are -// numerous, so you prefer to have your reputation apply -// to all your email-sending hosts. - -#if 0   /* typical Received-SPF line */ - Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70; -#endif - -#if 0   /* SPF users */ - :; mail-scan +received-spf /home/jsd/Maildir/cur[/]*  | -    sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' | -    awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr -     81 <gmail.com> -     17 <mac.com> -      8 <gmx.net> -      8 <bbruner@gmail.com> -      7 <jsd@av8n.com> -      6 <kst24@cam.ac.uk> -      5 <farooq.w@gmail.com> -      4 <scerri@chem.ucla.edu> -      4 <comcast.net> -      4 <c2i.net> -      3 <gemort2006@gmail.com> -      2 <rrhake@earthlink.net> -      2 <hotmail.com> -      2 <GCC.EDU> -      1 <us.panasonic.com> -      1 <sss.pgh.pa.us> -      1 <scot_wherland@wsu.edu> -      1 <rpendarvis@brenau.edu> -      1 <hmperks@gmail.com> -      1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu> -      1 <arcor.de> -#endif - -#if 0   /* DKIM users */ -     52 d=googlegroups.com; -     27 d=barackobama.com; -     10 d=gmail.com; -      5 d=bronto.com; -      5 d=bluehornet.com; -      4 d=news.abebooks.com; -      2 d=yahoo.co.uk; -      2 d=sbcglobal.net; -      2 d=embarqmail.com; -      2 d=emailms.angieslist.com; -      1 d=newsletters.sourceforge.net; -      1 d=members.ebay.com; -      1 d=info.citibank.com; -      1 d=ebay.com; -      1 d=commail1.co.za; -#endif - -  list<string> badnews; -  int whitelisted(0); - -  if (subject.find("sesame") != string::npos -        && subject.find("swordfish") != string::npos) { -    whitelisted++; -  } - -  if (delivered_to.length()){ -    cerr << progid <<  " Delivered-to: <<<" << delivered_to << ">>>" << endl; -  } -  if (toLower(trim(delivered_to)) == "jean@av8n.com") { -    badnews.push_back("Looping Delivered-to: " + delivered_to); -  } - -  if (subject.find("-please-bounce-this-") != string::npos) { -    badnews.push_back("by request"); -  } - -  if (!date.length()) { -    badnews.push_back("no date"); -  } - -  if (mid_required && !message_id.length()) { -    badnews.push_back("no message-id"); -  } - -  if (badnews.size() && !whitelisted){ -    cerr << progid << " " << join(", ", badnews) << endl; -    if (error_exit){ -      cerr << progid << " '" << from -             << "' to '" << to -             << "'" << endl; -      exeunt(ex_spam); -    } -  } -  return 0; -} - -int skrewt::body(){ -  string main_contype; -  if (content_type.length()) -    parse_content(content_type, main_contype, boundary); -// some slightly-useful booleans: -  int currently_text = main_contype == "text"; -  int main_multipart = main_contype == "multipart"; - -// early-stage thinking has been done. -// Now spew the rest of the message -  //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; -  int in_subheads(0); -  int textlines(0); - -  for (;;){             // outer loop over all lines in the body -    if (cin.eof()) break; -    if (cin.bad()) return 1; -    string line; -// on fail, go back to top of outer loop and check for eof versus bad -    if (getline(cin, line).fail()) continue; -    msgsize += line.length()+1; -    if (msgsize > maxsize) { -      cerr << progid << " rejection: bigger than " << maxsize << endl; -      maybe_exeunt(ex_spam, error_exit); -    } -    bigbuf.push_back(line); -    cout << line << endl; -    if (in_subheads){ -      if (line == "" || line == "\r") in_subheads = 0; -    } -    if (in_subheads){ -        string sub_contype; -        string junk; -// in principle could worry about folded headers, -// but in this application it doesn't actually matter -        string headword; -        string rest; -        size_t where = line.find(":"); -        if (where != string::npos) { -          headword = line.substr(0, where); -          rest = ltrim(line.substr(1+where)); -        } -        headword = toLower(headword); -        if (headword == "content-type") { -          parse_content(rest, sub_contype, junk); -          currently_text = sub_contype == "text"; -          //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; -        } -    } else { -      if (main_multipart && line == "--" + boundary) { -        //xxxx cerr << "found subhead boundary" << endl; -        in_subheads = 1; -        continue; -      } -      if (currently_text) textlines++; -    } -  } - -  if (0) cerr << "textlines: " << textlines << endl; -  if (!textlines) { -    cerr << progid << " rejection: no text: " << error_exit << endl; -    maybe_exeunt(ex_spam, error_exit); -  } -  cerr << progid << " normal completion" << endl; -  return(ex_good); -} -  ////////////////////////////////////////////////////////////  int main(int _argc, const char** _argv){ @@ -546,24 +85,22 @@ int main(int _argc, const char** _argv){    }    skrewt mysk; +//  cerr << "maxsize: " << mysk.maxsize << endl; -  while (argc) { -    string arg(*argv); argv++; argc--; +  argParser ARGS(argc, argv); +  try {while (ARGS.size()) { +    string arg = ARGS.next();      if (arg.substr(0,2) == "--") arg = arg.substr(1); -    if (prefix(arg, "-help")) { +    if (ARGS.prefix("-help")) {        usage(0);      }      if (0) { -    } else if (prefix(arg, "-mid-required")) { +    } else if (ARGS.prefix("-mid-required")) {        mysk.mid_required++; -    } else if (prefix(arg, "-error-exit")) { +    } else if (ARGS.prefix("-error-exit")) {        mysk.error_exit++; -    } else if (prefix(arg, "-maxsize")) { -      if (!argc) { -        cerr << "Option -maxsize requires an argument" << endl; -        exit(ex_usage); -      } -      mysk.maxsize = atoi(*argv); argv++; argc--; +    } else if (ARGS.prefix("-maxsize", 1)) { +      mysk.maxsize = atoi(ARGS.shift().c_str());      } else if (arg.substr(0,1) == "-") {        cerr << "Unrecognized option '" << arg << "'" << endl;        cerr << "For help, try:  " << progname << " -help" << endl; @@ -573,10 +110,16 @@ int main(int _argc, const char** _argv){        cerr << "For help, try:  " << progname << " -help" << endl;        exit(ex_usage);      } +  }} +  catch (int) { +    exit(ex_usage);    } -  int rslt = mysk.headers(); +  int rslt = mysk.headers(cin);    if (rslt) return rslt; +  mysk.dump_bigbuf(cout); +  mysk.headerbuf = mysk.bigbuf; +  mysk.bigbuf = vector<string>(0);  // Headers are done.  // Do some early-stage thinking. @@ -584,7 +127,8 @@ int main(int _argc, const char** _argv){    rslt = mysk.interstage();    if (rslt) return rslt; -  rslt = mysk.body(); -  return rslt; - +  rslt = mysk.body(cin, cout); +  if (rslt) return rslt; +  mysk.dump_bigbuf(cout); +  return 0;  } | 
