From 7abce5d60408c7713181249ba3f23b72a40aa326 Mon Sep 17 00:00:00 2001 From: John Denker Date: Sat, 24 Nov 2012 09:03:18 -0800 Subject: new libskrewt-based code is now stable and in use; let it be the basis for further developments --- tools/skrewt.c | 498 +++------------------------------------------------------ 1 file changed, 21 insertions(+), 477 deletions(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index 63c6be6..abea289 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -8,7 +8,6 @@ #include /* for exit() */ #include /* for strcmp() */ #include /* toupper */ -#include #include /* perror */ #include @@ -37,33 +36,8 @@ void usage(const int sts){ exit(sts); } -#include "qq_exit_codes.h" +#include "libskrewt.h" #include "utils.h" -#include "sepofra.h" - -void maybe_exeunt(const int sts, const int really){ - if (!really) return; - if (sts == ex_good) exit(sts); - - const char* foo = getenv("HI_Q_GROUP"); - if (!foo) exit(sts); - -// No point in signalling ourself: - sighandler_t rslt = signal(SIGUSR1, SIG_IGN); - if (rslt == SIG_ERR) { - cerr << "error setting signal" << endl; - } - int k = kill(-atoi(foo), SIGUSR1); - if (k) { - cerr << "kill failed on group " << atoi(foo) << " ... "; - perror(0); - } - exit(sts); -} - -void exeunt(const int sts){ - maybe_exeunt(sts, 1); -} string progname, progid; int mypid; @@ -72,87 +46,6 @@ int mypid; /* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165" */ // -void parse_content(const string type_spec_line, - string &maintype, string &boundary) { - //xxx cerr << "parser called with: " << type_spec_line << endl; - string get_type(type_spec_line); - - size_t where = get_type.find_first_of(" \t;\n"); - string rest; - if (where == string::npos) { - // keep whole string - } - else { - rest = get_type.substr(where+1); - get_type = get_type.substr(0,where); - } - where = get_type.find("/"); - if (where == string::npos){ - maintype = ""; - cerr << "could not find / in " << get_type << endl; - } else { - maintype = get_type.substr(0, where); - } - -// now need to find boundary - - string srch = "boundary="; - where = rest.find(srch); - if (where != string::npos) { - where += srch.length(); - boundary = rest.substr(where); - if (boundary[0] == '"') { - boundary = boundary.substr(1); - where = boundary.find_first_of("\""); - } else { - where = boundary.find_first_of(" \t;\n"); - } - if (where == string::npos) { - /* do nothing, boundary=boundary as a whole */ - } else { - boundary = boundary.substr(0, where); - } - } else { - //xxxxxxx cerr << "boundary= not found in " << type_spec_line << endl; - } -} - -class skrewt{ -public: - string received_from; // envelope HELO among other things - string proximta_HELO; - string proximta_rDNS; - string proximta_IP; - string proximta_AuthUser; - string return_path; // envelope MAIL FROM - string boundary; - string to; - string from; - string subject; - string date; - string message_id; - string content_type; - string delivered_to; - int msgsize; - vector bigbuf; - int saw_blank_line; - int recno; - - int maxsize; - int error_exit; - int mid_required; - - // constructor - skrewt() - : boundary("x-xx-x"), msgsize(0), saw_blank_line(0), recno(0), - maxsize(1000*1000), error_exit(0), mid_required(0) - {} - - int headers(); - int interstage(); - int body(); - int krunch_rfrom(); -}; #if 0 /* typical "Received: from" lines */ Received: from lists.sourceforge.net (216.34.181.88) @@ -178,360 +71,6 @@ Received: from ip68-231-191-153.tc.ph.cox.net (HELO asclepias.av8n.net) (smtp@68 /home/jsd/Maildir/cur/1342363199.24320.cloud:2, #endif -int skrewt::krunch_rfrom(){ - stringstream parse; - parse.str(received_from); - string word; - parse >> word; - if (word != "from") { - cerr << progid << " bad 'Received: from' line ... '" - << word << "'" << endl; - return ex_syserr; - } - parse >> proximta_rDNS; - parse >> word; - if (word == "(HELO") { - parse >> proximta_HELO; - proximta_HELO = rtrim(proximta_HELO, "()"); - parse >> word; - } else { - proximta_HELO = proximta_rDNS; - } - size_t len = word.length(); - if (len<2 || word[0] != '(' || word[len-1] != ')') { - cerr << progid << " bad 'Received: from' line ;;; '" - << word << "'" << endl; - return ex_syserr; - } - proximta_IP = word.substr(1, len-2); - size_t where = proximta_IP.find("@"); - if (where != string::npos){ - proximta_AuthUser = proximta_IP.substr(0, where); - proximta_IP = proximta_IP.substr(1+where); - } - - return 0; -} - -int skrewt::headers(){ - //xxxx cerr << progid << " begins" << endl; - for (;;){ // outer loop over all records in the header - if (cin.eof()) break; - if (cin.bad()) return 1; - - string line; -// on fail, go back to top of outer loop and check for eof versus bad - if (getline(cin, line).fail()) continue; - msgsize += line.length()+1; - if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; - exeunt(ex_spam); - } - cout << line << endl; - bigbuf.push_back(line); - string headrec = noCR(line); // for a folded record, this is the first line - - for (;;) { // inner loop to build a multi-line record e.g. folded record: - if (cin.eof()) break; - if (cin.bad()) return 1; - char ch; - if (cin.get(ch).fail()) continue; - cin.putback(ch); - if (ch != ' ' && ch != '\t') break; - string line; -// on fail, go back to top of inner loop and check for eof versus bad - if (getline(cin, line).fail()) continue; - msgsize += line.length()+1; - if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; - exeunt(ex_spam); - } - cout << line << endl; - bigbuf.push_back(line); - headrec += "\n" + noCR(line); - } -// here with a fully assembled header record -// headrec (unlike line) contains no DOS CR characters - int len = headrec.length(); - if (len == 0) { - saw_blank_line = 1; - break; // no more headers in this message - } - -// here if it's a header line - string headword; - string rest; - size_t where = headrec.find(":"); - if (where != string::npos) { - headword = headrec.substr(0, where); - rest = ltrim(headrec.substr(1+where)); - } - headword = toLower(headword); - if (0){ - } else if (headword == "from") { - from = rest; - } else if (headword == "to") { - to = rest; - } else if (headword == "return-path") { - return_path = rest; - } else if (headword == "message-id") { - message_id = rest; - } else if (headword == "received") { - if (!received_from.length() && prefix("from ", rest)){ - received_from = rest; - } - } else if (headword == "date") { - date = rest; - } else if (headword == "subject") { - subject = rest; - } else if (headword == "content-type") { - content_type = rest; - } else if (headword == "delivered-to") { - delivered_to = rest; - } - //xxxx cout << headrec.length() << " ... "; - recno++; - if (0) if (recno <= 6) cerr << progid << "#" << recno - << " " << headrec << endl; - } - return 0; -} - -int skrewt::interstage(){ - if (saw_blank_line) {/* ignore */} -// Note that the headers are in reverse-chronological order: - cerr << progid <<" Return-path: " << return_path <"}' | sort | uniq -c | sort -nr - 81 - 17 - 8 - 8 - 7 - 6 - 5 - 4 - 4 - 4 - 3 - 2 - 2 - 2 - 1 - 1 - 1 - 1 - 1 - 1 - 1 -#endif - -#if 0 /* DKIM users */ - 52 d=googlegroups.com; - 27 d=barackobama.com; - 10 d=gmail.com; - 5 d=bronto.com; - 5 d=bluehornet.com; - 4 d=news.abebooks.com; - 2 d=yahoo.co.uk; - 2 d=sbcglobal.net; - 2 d=embarqmail.com; - 2 d=emailms.angieslist.com; - 1 d=newsletters.sourceforge.net; - 1 d=members.ebay.com; - 1 d=info.citibank.com; - 1 d=ebay.com; - 1 d=commail1.co.za; -#endif - - list badnews; - int whitelisted(0); - - if (subject.find("sesame") != string::npos - && subject.find("swordfish") != string::npos) { - whitelisted++; - } - - if (delivered_to.length()){ - cerr << progid << " Delivered-to: <<<" << delivered_to << ">>>" << endl; - } - if (toLower(trim(delivered_to)) == "jean@av8n.com") { - badnews.push_back("Looping Delivered-to: " + delivered_to); - } - - if (subject.find("-please-bounce-this-") != string::npos) { - badnews.push_back("by request"); - } - - if (!date.length()) { - badnews.push_back("no date"); - } - - if (mid_required && !message_id.length()) { - badnews.push_back("no message-id"); - } - - if (badnews.size() && !whitelisted){ - cerr << progid << " " << join(", ", badnews) << endl; - if (error_exit){ - cerr << progid << " '" << from - << "' to '" << to - << "'" << endl; - exeunt(ex_spam); - } - } - return 0; -} - -int skrewt::body(){ - string main_contype; - if (content_type.length()) - parse_content(content_type, main_contype, boundary); -// some slightly-useful booleans: - int currently_text = main_contype == "text"; - int main_multipart = main_contype == "multipart"; - -// early-stage thinking has been done. -// Now spew the rest of the message - //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; - int in_subheads(0); - int textlines(0); - - for (;;){ // outer loop over all lines in the body - if (cin.eof()) break; - if (cin.bad()) return 1; - string line; -// on fail, go back to top of outer loop and check for eof versus bad - if (getline(cin, line).fail()) continue; - msgsize += line.length()+1; - if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; - maybe_exeunt(ex_spam, error_exit); - } - bigbuf.push_back(line); - cout << line << endl; - if (in_subheads){ - if (line == "" || line == "\r") in_subheads = 0; - } - if (in_subheads){ - string sub_contype; - string junk; -// in principle could worry about folded headers, -// but in this application it doesn't actually matter - string headword; - string rest; - size_t where = line.find(":"); - if (where != string::npos) { - headword = line.substr(0, where); - rest = ltrim(line.substr(1+where)); - } - headword = toLower(headword); - if (headword == "content-type") { - parse_content(rest, sub_contype, junk); - currently_text = sub_contype == "text"; - //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; - } - } else { - if (main_multipart && line == "--" + boundary) { - //xxxx cerr << "found subhead boundary" << endl; - in_subheads = 1; - continue; - } - if (currently_text) textlines++; - } - } - - if (0) cerr << "textlines: " << textlines << endl; - if (!textlines) { - cerr << progid << " rejection: no text: " << error_exit << endl; - maybe_exeunt(ex_spam, error_exit); - } - cerr << progid << " normal completion" << endl; - return(ex_good); -} - //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ @@ -546,24 +85,22 @@ int main(int _argc, const char** _argv){ } skrewt mysk; +// cerr << "maxsize: " << mysk.maxsize << endl; - while (argc) { - string arg(*argv); argv++; argc--; + argParser ARGS(argc, argv); + try {while (ARGS.size()) { + string arg = ARGS.next(); if (arg.substr(0,2) == "--") arg = arg.substr(1); - if (prefix(arg, "-help")) { + if (ARGS.prefix("-help")) { usage(0); } if (0) { - } else if (prefix(arg, "-mid-required")) { + } else if (ARGS.prefix("-mid-required")) { mysk.mid_required++; - } else if (prefix(arg, "-error-exit")) { + } else if (ARGS.prefix("-error-exit")) { mysk.error_exit++; - } else if (prefix(arg, "-maxsize")) { - if (!argc) { - cerr << "Option -maxsize requires an argument" << endl; - exit(ex_usage); - } - mysk.maxsize = atoi(*argv); argv++; argc--; + } else if (ARGS.prefix("-maxsize", 1)) { + mysk.maxsize = atoi(ARGS.shift().c_str()); } else if (arg.substr(0,1) == "-") { cerr << "Unrecognized option '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; @@ -573,10 +110,16 @@ int main(int _argc, const char** _argv){ cerr << "For help, try: " << progname << " -help" << endl; exit(ex_usage); } + }} + catch (int) { + exit(ex_usage); } - int rslt = mysk.headers(); + int rslt = mysk.headers(cin); if (rslt) return rslt; + mysk.dump_bigbuf(cout); + mysk.headerbuf = mysk.bigbuf; + mysk.bigbuf = vector(0); // Headers are done. // Do some early-stage thinking. @@ -584,7 +127,8 @@ int main(int _argc, const char** _argv){ rslt = mysk.interstage(); if (rslt) return rslt; - rslt = mysk.body(); - return rslt; - + rslt = mysk.body(cin, cout); + if (rslt) return rslt; + mysk.dump_bigbuf(cout); + return 0; } -- cgit v1.2.3