diff options
author | John Denker <jsd@av8n.com> | 2012-07-31 17:48:33 -0700 |
---|---|---|
committer | John Denker <jsd@av8n.com> | 2012-07-31 17:48:52 -0700 |
commit | 2bda79fc4e14a9250ad6ebefb67c7379067a4a42 (patch) | |
tree | 3b825e4a2dde4f8fc943aa99505f22250b06992b /tools | |
parent | a51b9f035986ea1d4365b8641c07dd7e2b318ca3 (diff) |
working toward parse of 'Received: from' line
Diffstat (limited to 'tools')
-rw-r--r-- | tools/mail-scan.c | 27 | ||||
-rw-r--r-- | tools/skrewt.c | 132 |
2 files changed, 151 insertions, 8 deletions
diff --git a/tools/mail-scan.c b/tools/mail-scan.c index 3c53b2b..5378e89 100644 --- a/tools/mail-scan.c +++ b/tools/mail-scan.c @@ -44,6 +44,7 @@ void usage(const int sts){ " -vert invert: print only if *no* match.\n" " -l print filename only, not matching text.\n" " -group print a blank line after every match.\n" +" -max nn print at most nn records per file.\n" " -multi print multi-line records on multiple lines\n" " (as opposed to smashing them all onto one long line\n" " -addr assume field contains somebody <foo@bar.com>; print just foo@bar.com\n" @@ -183,6 +184,7 @@ int main(int _argc, const char** _argv){ int vflag(0); int group_flag(0); int multi(0); + int maxlines(0); int fname_only(0); int addr_mode(0); boost::regex addr_filter(string("<.*@(.*)>"), boost::regex_constants::icase); @@ -205,6 +207,14 @@ int main(int _argc, const char** _argv){ } else if (prefix(arg, "-address" /* long */)){ addr_mode++; continue; + } else if (prefix(arg, "-max" /* long */)){ + if (!argc){ + cerr << "Option '" << arg << "' requires an argument" << endl; + cerr << "For help, try: " << progname << " -help" << endl; + exit(sa_usage); + } + maxlines = atoi(*argv++); argc--; + continue; } else if (prefix(arg, "-multi" /* long */)){ multi++; continue; @@ -296,14 +306,16 @@ int main(int _argc, const char** _argv){ } cout << endl; didprint++; + if (maxlines && didprint >= maxlines) goto endfile; } else /* addr_mode */{ - boost::smatch matches; - if (boost::regex_search(header, matches, addr_filter)){ - cout << string(matches[1].first, matches[1].second) << endl; - } else { - cerr << "no match, ignoring: " << header << endl; - } - didprint++; + boost::smatch matches; + if (boost::regex_search(header, matches, addr_filter)){ + cout << string(matches[1].first, matches[1].second) << endl; + } else { + cerr << "no match, ignoring: " << header << endl; + } + didprint++; + if (maxlines && didprint >= maxlines) goto endfile; } } } @@ -316,6 +328,7 @@ int main(int _argc, const char** _argv){ break; } } // end loop over matching records in this file +endfile:;;;; if (vflag && !foundsome_infile) { cout << *file << endl; diff --git a/tools/skrewt.c b/tools/skrewt.c index 4d69862..daa3224 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -180,6 +180,11 @@ string join(const string sep, const list<string> stuff){ class skrewt{ public: + string received_from; // envelope HELO among other things + string proximta_HELO; + string proximta_rDNS; + string proximta_IP; + string return_path; // envelope MAIL FROM string boundary; string to; string from; @@ -205,8 +210,42 @@ public: int headers(); int interstage(); int body(); + int krunch_rfrom(); }; +#if 0 /* typical "Received: from" lines */ +Received: from lists.sourceforge.net (216.34.181.88) + by cloud.av8n.com with SMTP; 31 Jul 2012 22:13:48 -0000 + +Received: from 24-145-119-127-dhcp.gsv.md.atlanticbb.net (HELO mail.phys-l.org) (24.145.119.127) by cloud.av8n.com with SMTP; 14 Jul 2012 23:56:54 -0000 + +Received: from ip68-231-191-153.tc.ph.cox.net (HELO asclepias.av8n.net) (smtp@68.231.191.153) by cloud.av8n.com with SMTP; 15 Jul 2012 14:39:58 -0000 +#endif + +int skrewt::krunch_rfrom(){ + stringstream parse; + parse.str(received_from); + string word; + parse >> word; + if (word != "from") { + cerr << progid << " bad 'Received: from' line ... '" + << word << "'" << endl; + return ex_syserr; + } + parse >> proximta_rDNS; + parse >> word; + if (word == "(HELO") { + parse >> proximta_HELO; + proximta_HELO = rtrim(proximta_HELO, "()"); + } else { + proximta_HELO = proximta_rDNS; + } + + + + return 0; +} + int skrewt::headers(){ //xxxx cerr << progid << " begins" << endl; for (;;){ // outer loop over all records in the header @@ -266,8 +305,14 @@ int skrewt::headers(){ from = rest; } else if (headword == "to") { to = rest; + } else if (headword == "return-path") { + return_path = rest; } else if (headword == "message-id") { message_id = rest; + } else if (headword == "received") { + if (!received_from.length() && prefix("from ", rest)){ + received_from = rest; + } } else if (headword == "date") { date = rest; } else if (headword == "subject") { @@ -285,7 +330,14 @@ int skrewt::headers(){ int skrewt::interstage(){ if (saw_blank_line) {/* ignore */} - cerr << progid <<" Mid '" << message_id << "'" << endl; +// Note that the headers are in reverse-chronological order: + cerr << progid <<" Return-path: " << return_path <<endl; + cerr << " Received: " << received_from <<endl; + int rslt = krunch_rfrom(); + if (rslt) return rslt; + cerr << " rDNS: " << proximta_rDNS << endl; + cerr << " HELO: " << proximta_HELO << endl; + cerr << " Mid '" << message_id << "'" << endl; // The logic here is: In order: // 1) If whitelisted, accept. No greylisting, no spam-checking. @@ -310,6 +362,84 @@ int skrewt::interstage(){ // 4c) If it is in the "green" state, let the timer // continue from where it is. Reject with temporary error. +// Note: Reputation normally attaches to a domain. +// With SPF, the domain vouches for the sender at a given IP address +// ... and then the sender implicitly vouches for the message. +// With DKIM, the domain vouches for an individual message. +// With neither SPF nor DKIM, reputation attaches to the sender's +// IP address. The sender vouches for the message. +// +// During greylisting, delay applies to the message. Reputation +// applies to the domain (via SPF or DKIM) or to the server +// (otherwise). + + +// If you are a medium-sized operator, such that you have one +// and only one IP address that ever sends email, and it is a +// static IP address, then you don't have much to gain from +// DKIM or SPF. Attaching a reputation to your domain is not +// much different from attaching a reputation to your IP address. + +// In constrast, if you are a low-budget operator with a +// dynamic IP address, you benefit from SPF and/or DKIM. +// Your reputation attaches to your domain, and remains +// stable even as your IP address changes. + +// At the other extreme, if you are a big-time operator +// such as googlegroups.com, you benefit from DKIM and/or +// SPF. Your IP addresses are not dynamic, but they are +// numerous, so you prefer to have your reputation apply +// to all your email-sending hosts. + +#if 0 /* typical Received-SPF line */ + Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70; +#endif + +#if 0 /* SPF users */ + :; mail-scan +received-spf /home/jsd/Maildir/cur[/]* | + sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' | + awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr + 81 <gmail.com> + 17 <mac.com> + 8 <gmx.net> + 8 <bbruner@gmail.com> + 7 <jsd@av8n.com> + 6 <kst24@cam.ac.uk> + 5 <farooq.w@gmail.com> + 4 <scerri@chem.ucla.edu> + 4 <comcast.net> + 4 <c2i.net> + 3 <gemort2006@gmail.com> + 2 <rrhake@earthlink.net> + 2 <hotmail.com> + 2 <GCC.EDU> + 1 <us.panasonic.com> + 1 <sss.pgh.pa.us> + 1 <scot_wherland@wsu.edu> + 1 <rpendarvis@brenau.edu> + 1 <hmperks@gmail.com> + 1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu> + 1 <arcor.de> +#endif + +#if 0 /* DKIM users */ + 52 d=googlegroups.com; + 27 d=barackobama.com; + 10 d=gmail.com; + 5 d=bronto.com; + 5 d=bluehornet.com; + 4 d=news.abebooks.com; + 2 d=yahoo.co.uk; + 2 d=sbcglobal.net; + 2 d=embarqmail.com; + 2 d=emailms.angieslist.com; + 1 d=newsletters.sourceforge.net; + 1 d=members.ebay.com; + 1 d=info.citibank.com; + 1 d=ebay.com; + 1 d=commail1.co.za; +#endif + list<string> badnews; if (subject.find("-please-bounce-this-") != string::npos) { |