diff options
author | John Denker <jsd@av8n.com> | 2012-11-22 16:39:07 -0800 |
---|---|---|
committer | John Denker <jsd@av8n.com> | 2012-11-22 16:39:07 -0800 |
commit | 4dd94a839cbae8a45889b224945eeaa6fb93b578 (patch) | |
tree | 1a301b7358e6bac17579fd9ae28da7630840c3d2 /tools | |
parent | 6faf97fd62cc84e2405731534236fb2a93ba4407 (diff) |
move yet more stuff; ward.c is starting to get small
Diffstat (limited to 'tools')
-rw-r--r-- | tools/libskrewt.c | 260 | ||||
-rw-r--r-- | tools/libskrewt.h | 4 | ||||
-rw-r--r-- | tools/ward.c | 260 |
3 files changed, 264 insertions, 260 deletions
diff --git a/tools/libskrewt.c b/tools/libskrewt.c index 602fdb7..812b546 100644 --- a/tools/libskrewt.c +++ b/tools/libskrewt.c @@ -2,6 +2,7 @@ #include "utils.h" #include <iostream> #include <sstream> +#include <signal.h> using namespace std; @@ -84,3 +85,262 @@ int skrewt::krunch_rfrom(){ return 0; } + +int skrewt::interstage(){ + if (saw_blank_line) {/* ignore */} +// Note that the headers are in reverse-chronological order: + cerr << progid <<" Return-path: " << return_path <<endl; + + { // parse the 'Received: from' line: + cerr << " Received: " << received_from <<endl; + int rslt = krunch_rfrom(); + if (rslt) return rslt; + cerr << " rDNS: " << proximta_rDNS << endl; + cerr << " HELO: " << proximta_HELO << endl; + cerr << " IP: " << proximta_IP << endl; + cerr << " AuthUser: " << proximta_AuthUser << endl; + cerr << " Mid '" << message_id << "'" << endl; + } + + sepofra my_spf; + try { + my_spf.check(proximta_IP, + proximta_HELO, + return_path, + "junk", 0/* verbosity */); + cerr << "*** " << my_spf.explain() << endl; + } catch (bad_thing foo) { + cerr << "Caught bad thing: " << foo.what() << endl; + return ex_syserr; + } + +// The logic here is: In order: +// 1:: If whitelisted, accept. No greylisting, no spam-checking. +// 2:: If blacklisted, reject. No greylisting, no spam-checking. +// 3:: If good reputation, spam-check it and send it on its way. +// 4:: If no reputation, greylist. +// 5:: If bad reputation, ???? + +// Expanding item 3 to the next level of detail: +// 3a:: If some domain vouches for this sender-IP via SPF, +// then the reputation is bound to the domain. +// 3c:: If some domain vouches for the message vie DKIM, +// then the reputation is bound to the domain. +// 3d:: If no SPF or DKIM, then the reputation attaches +// to the sender-IP. + +// Expanding item 4 to the next level of detail: +// 4a:: If the greylisting database says this message is ripe +// spam-check it. If it's OK, use it to count toward reputation. +// 4b:: If it is previously unseen or too old, start greylisting +// timer from scratch. Reject with temporary error. +// 4c:: If it is in the "green" state, let the timer +// continue from where it is. Reject with temporary error. + +// Note: Reputation normally attaches to a domain. +// With SPF, the domain vouches for the sender at a given IP address +// ... and then the sender implicitly vouches for the message. +// With DKIM, the domain vouches for an individual message. +// With neither SPF nor DKIM, reputation attaches to the sender's +// IP address. The sender vouches for the message. +// +// During greylisting, delay applies to the message. Reputation +// applies to the domain (via SPF or DKIM) or to the server +// (otherwise). + + +// If you are a medium-sized operator, such that you have one +// and only one IP address that ever sends email, and it is a +// static IP address, then you don't have much to gain from +// DKIM or SPF. Attaching a reputation to your domain is not +// much different from attaching a reputation to your IP address. + +// In constrast, if you are a low-budget operator with a +// dynamic IP address, you benefit from SPF and/or DKIM. +// Your reputation attaches to your domain, and remains +// stable even as your IP address changes. + +// At the other extreme, if you are a big-time operator +// such as googlegroups.com, you benefit from DKIM and/or +// SPF. Your IP addresses are not dynamic, but they are +// numerous, so you prefer to have your reputation apply +// to all your email-sending hosts. + +#if 0 /* typical Received-SPF line */ + Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70; +#endif + +#if 0 /* SPF users */ + :; mail-scan +received-spf /home/jsd/Maildir/cur[/]* | + sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' | + awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr + 81 <gmail.com> + 17 <mac.com> + 8 <gmx.net> + 8 <bbruner@gmail.com> + 7 <jsd@av8n.com> + 6 <kst24@cam.ac.uk> + 5 <farooq.w@gmail.com> + 4 <scerri@chem.ucla.edu> + 4 <comcast.net> + 4 <c2i.net> + 3 <gemort2006@gmail.com> + 2 <rrhake@earthlink.net> + 2 <hotmail.com> + 2 <GCC.EDU> + 1 <us.panasonic.com> + 1 <sss.pgh.pa.us> + 1 <scot_wherland@wsu.edu> + 1 <rpendarvis@brenau.edu> + 1 <hmperks@gmail.com> + 1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu> + 1 <arcor.de> +#endif + +#if 0 /* DKIM users */ + 52 d=googlegroups.com; + 27 d=barackobama.com; + 10 d=gmail.com; + 5 d=bronto.com; + 5 d=bluehornet.com; + 4 d=news.abebooks.com; + 2 d=yahoo.co.uk; + 2 d=sbcglobal.net; + 2 d=embarqmail.com; + 2 d=emailms.angieslist.com; + 1 d=newsletters.sourceforge.net; + 1 d=members.ebay.com; + 1 d=info.citibank.com; + 1 d=ebay.com; + 1 d=commail1.co.za; +#endif + + list<string> badnews; + int whitelisted(0); + + if (subject.find("sesame") != string::npos + && subject.find("swordfish") != string::npos) { + whitelisted++; + } + + if (delivered_to.length()){ + cerr << progid << " Delivered-to: <<<" << delivered_to << ">>>" << endl; + } + if (toLower(trim(delivered_to)) == "jean@av8n.com") { + badnews.push_back("Looping Delivered-to: " + delivered_to); + } + + if (subject.find("-please-bounce-this-") != string::npos) { + badnews.push_back("by request"); + } + + if (!date.length()) { + badnews.push_back("no date"); + } + + if (mid_required && !message_id.length()) { + badnews.push_back("no message-id"); + } + + if (badnews.size() && !whitelisted){ + cerr << progid << " " << join(", ", badnews) << endl; + if (error_exit){ + cerr << progid << " '" << from + << "' to '" << to + << "'" << endl; + exeunt(ex_spam); + } + } + return 0; +} + +int skrewt::body(){ + string main_contype; + if (content_type.length()) + parse_content(content_type, main_contype, boundary); +// some slightly-useful booleans: + int currently_text = main_contype == "text"; + int main_multipart = main_contype == "multipart"; + +// early-stage thinking has been done. +// Now spew the rest of the message + //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; + int in_subheads(0); + int textlines(0); + + for (;;){ // outer loop over all lines in the body + if (cin.eof()) break; + if (cin.bad()) return 1; + string line; +// on fail, go back to top of outer loop and check for eof versus bad + if (getline(cin, line).fail()) continue; + msgsize += line.length()+1; + if (msgsize > maxsize) { + cerr << progid << " rejection: bigger than " << maxsize << endl; + maybe_exeunt(ex_spam, error_exit); + } + bigbuf.push_back(line); + cout << line << endl; + if (in_subheads){ + if (line == "" || line == "\r") in_subheads = 0; + } + if (in_subheads){ + string sub_contype; + string junk; +// in principle could worry about folded headers, +// but in this application it doesn't actually matter + string headword; + string rest; + size_t where = line.find(":"); + if (where != string::npos) { + headword = line.substr(0, where); + rest = ltrim(line.substr(1+where)); + } + headword = toLower(headword); + if (headword == "content-type") { + parse_content(rest, sub_contype, junk); + currently_text = sub_contype == "text"; + //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; + } + } else { + if (main_multipart && line == "--" + boundary) { + //xxxx cerr << "found subhead boundary" << endl; + in_subheads = 1; + continue; + } + if (currently_text) textlines++; + } + } + + if (0) cerr << "textlines: " << textlines << endl; + if (!textlines) { + cerr << progid << " rejection: no text: " << error_exit << endl; + maybe_exeunt(ex_spam, error_exit); + } + cerr << progid << " normal completion" << endl; + return(ex_good); +} + +void maybe_exeunt(const int sts, const int really){ + if (!really) return; + if (sts == ex_good) exit(sts); + + const char* foo = getenv("HI_Q_GROUP"); + if (!foo) exit(sts); + +// No point in signalling ourself: + sighandler_t rslt = signal(SIGUSR1, SIG_IGN); + if (rslt == SIG_ERR) { + cerr << "error setting signal" << endl; + } + int k = kill(-atoi(foo), SIGUSR1); + if (k) { + cerr << "kill failed on group " << atoi(foo) << " ... "; + perror(0); + } + exit(sts); +} + +void exeunt(const int sts){ + maybe_exeunt(sts, 1); +} diff --git a/tools/libskrewt.h b/tools/libskrewt.h index 94d06dd..c5fea7e 100644 --- a/tools/libskrewt.h +++ b/tools/libskrewt.h @@ -1,5 +1,6 @@ #include <string> #include <vector> +#include "sepofra.h" #include "qq_exit_codes.h" // a bit of a kludge extern std::string progid; @@ -42,3 +43,6 @@ public: void parse_content(const std::string type_spec_line, std::string &maintype, std::string &boundary); + +void exeunt(const int sts); +void maybe_exeunt(const int sts, const int really); diff --git a/tools/ward.c b/tools/ward.c index 6280a28..277fd76 100644 --- a/tools/ward.c +++ b/tools/ward.c @@ -8,7 +8,6 @@ #include <stdlib.h> /* for exit() */ #include <string> /* for strcmp() */ #include <ctype.h> /* toupper */ -#include <signal.h> #include <stdio.h> /* perror */ #include <sstream> @@ -38,33 +37,8 @@ void usage(const int sts){ } #include "libskrewt.h" -#include "sepofra.h" #include "utils.h" -void maybe_exeunt(const int sts, const int really){ - if (!really) return; - if (sts == ex_good) exit(sts); - - const char* foo = getenv("HI_Q_GROUP"); - if (!foo) exit(sts); - -// No point in signalling ourself: - sighandler_t rslt = signal(SIGUSR1, SIG_IGN); - if (rslt == SIG_ERR) { - cerr << "error setting signal" << endl; - } - int k = kill(-atoi(foo), SIGUSR1); - if (k) { - cerr << "kill failed on group " << atoi(foo) << " ... "; - perror(0); - } - exit(sts); -} - -void exeunt(const int sts){ - maybe_exeunt(sts, 1); -} - string progname, progid; int mypid; @@ -181,240 +155,6 @@ int skrewt::headers(){ return 0; } -int skrewt::interstage(){ - if (saw_blank_line) {/* ignore */} -// Note that the headers are in reverse-chronological order: - cerr << progid <<" Return-path: " << return_path <<endl; - - { // parse the 'Received: from' line: - cerr << " Received: " << received_from <<endl; - int rslt = krunch_rfrom(); - if (rslt) return rslt; - cerr << " rDNS: " << proximta_rDNS << endl; - cerr << " HELO: " << proximta_HELO << endl; - cerr << " IP: " << proximta_IP << endl; - cerr << " AuthUser: " << proximta_AuthUser << endl; - cerr << " Mid '" << message_id << "'" << endl; - } - - sepofra my_spf; - try { - my_spf.check(proximta_IP, - proximta_HELO, - return_path, - "junk", 0/* verbosity */); - cerr << "*** " << my_spf.explain() << endl; - } catch (bad_thing foo) { - cerr << "Caught bad thing: " << foo.what() << endl; - return ex_syserr; - } - -// The logic here is: In order: -// 1:: If whitelisted, accept. No greylisting, no spam-checking. -// 2:: If blacklisted, reject. No greylisting, no spam-checking. -// 3:: If good reputation, spam-check it and send it on its way. -// 4:: If no reputation, greylist. -// 5:: If bad reputation, ???? - -// Expanding item 3 to the next level of detail: -// 3a:: If some domain vouches for this sender-IP via SPF, -// then the reputation is bound to the domain. -// 3c:: If some domain vouches for the message vie DKIM, -// then the reputation is bound to the domain. -// 3d:: If no SPF or DKIM, then the reputation attaches -// to the sender-IP. - -// Expanding item 4 to the next level of detail: -// 4a:: If the greylisting database says this message is ripe -// spam-check it. If it's OK, use it to count toward reputation. -// 4b:: If it is previously unseen or too old, start greylisting -// timer from scratch. Reject with temporary error. -// 4c:: If it is in the "green" state, let the timer -// continue from where it is. Reject with temporary error. - -// Note: Reputation normally attaches to a domain. -// With SPF, the domain vouches for the sender at a given IP address -// ... and then the sender implicitly vouches for the message. -// With DKIM, the domain vouches for an individual message. -// With neither SPF nor DKIM, reputation attaches to the sender's -// IP address. The sender vouches for the message. -// -// During greylisting, delay applies to the message. Reputation -// applies to the domain (via SPF or DKIM) or to the server -// (otherwise). - - -// If you are a medium-sized operator, such that you have one -// and only one IP address that ever sends email, and it is a -// static IP address, then you don't have much to gain from -// DKIM or SPF. Attaching a reputation to your domain is not -// much different from attaching a reputation to your IP address. - -// In constrast, if you are a low-budget operator with a -// dynamic IP address, you benefit from SPF and/or DKIM. -// Your reputation attaches to your domain, and remains -// stable even as your IP address changes. - -// At the other extreme, if you are a big-time operator -// such as googlegroups.com, you benefit from DKIM and/or -// SPF. Your IP addresses are not dynamic, but they are -// numerous, so you prefer to have your reputation apply -// to all your email-sending hosts. - -#if 0 /* typical Received-SPF line */ - Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70; -#endif - -#if 0 /* SPF users */ - :; mail-scan +received-spf /home/jsd/Maildir/cur[/]* | - sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' | - awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr - 81 <gmail.com> - 17 <mac.com> - 8 <gmx.net> - 8 <bbruner@gmail.com> - 7 <jsd@av8n.com> - 6 <kst24@cam.ac.uk> - 5 <farooq.w@gmail.com> - 4 <scerri@chem.ucla.edu> - 4 <comcast.net> - 4 <c2i.net> - 3 <gemort2006@gmail.com> - 2 <rrhake@earthlink.net> - 2 <hotmail.com> - 2 <GCC.EDU> - 1 <us.panasonic.com> - 1 <sss.pgh.pa.us> - 1 <scot_wherland@wsu.edu> - 1 <rpendarvis@brenau.edu> - 1 <hmperks@gmail.com> - 1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu> - 1 <arcor.de> -#endif - -#if 0 /* DKIM users */ - 52 d=googlegroups.com; - 27 d=barackobama.com; - 10 d=gmail.com; - 5 d=bronto.com; - 5 d=bluehornet.com; - 4 d=news.abebooks.com; - 2 d=yahoo.co.uk; - 2 d=sbcglobal.net; - 2 d=embarqmail.com; - 2 d=emailms.angieslist.com; - 1 d=newsletters.sourceforge.net; - 1 d=members.ebay.com; - 1 d=info.citibank.com; - 1 d=ebay.com; - 1 d=commail1.co.za; -#endif - - list<string> badnews; - int whitelisted(0); - - if (subject.find("sesame") != string::npos - && subject.find("swordfish") != string::npos) { - whitelisted++; - } - - if (delivered_to.length()){ - cerr << progid << " Delivered-to: <<<" << delivered_to << ">>>" << endl; - } - if (toLower(trim(delivered_to)) == "jean@av8n.com") { - badnews.push_back("Looping Delivered-to: " + delivered_to); - } - - if (subject.find("-please-bounce-this-") != string::npos) { - badnews.push_back("by request"); - } - - if (!date.length()) { - badnews.push_back("no date"); - } - - if (mid_required && !message_id.length()) { - badnews.push_back("no message-id"); - } - - if (badnews.size() && !whitelisted){ - cerr << progid << " " << join(", ", badnews) << endl; - if (error_exit){ - cerr << progid << " '" << from - << "' to '" << to - << "'" << endl; - exeunt(ex_spam); - } - } - return 0; -} - -int skrewt::body(){ - string main_contype; - if (content_type.length()) - parse_content(content_type, main_contype, boundary); -// some slightly-useful booleans: - int currently_text = main_contype == "text"; - int main_multipart = main_contype == "multipart"; - -// early-stage thinking has been done. -// Now spew the rest of the message - //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; - int in_subheads(0); - int textlines(0); - - for (;;){ // outer loop over all lines in the body - if (cin.eof()) break; - if (cin.bad()) return 1; - string line; -// on fail, go back to top of outer loop and check for eof versus bad - if (getline(cin, line).fail()) continue; - msgsize += line.length()+1; - if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; - maybe_exeunt(ex_spam, error_exit); - } - bigbuf.push_back(line); - cout << line << endl; - if (in_subheads){ - if (line == "" || line == "\r") in_subheads = 0; - } - if (in_subheads){ - string sub_contype; - string junk; -// in principle could worry about folded headers, -// but in this application it doesn't actually matter - string headword; - string rest; - size_t where = line.find(":"); - if (where != string::npos) { - headword = line.substr(0, where); - rest = ltrim(line.substr(1+where)); - } - headword = toLower(headword); - if (headword == "content-type") { - parse_content(rest, sub_contype, junk); - currently_text = sub_contype == "text"; - //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; - } - } else { - if (main_multipart && line == "--" + boundary) { - //xxxx cerr << "found subhead boundary" << endl; - in_subheads = 1; - continue; - } - if (currently_text) textlines++; - } - } - - if (0) cerr << "textlines: " << textlines << endl; - if (!textlines) { - cerr << progid << " rejection: no text: " << error_exit << endl; - maybe_exeunt(ex_spam, error_exit); - } - cerr << progid << " normal completion" << endl; - return(ex_good); -} //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ |