From 2bda79fc4e14a9250ad6ebefb67c7379067a4a42 Mon Sep 17 00:00:00 2001 From: John Denker Date: Tue, 31 Jul 2012 17:48:33 -0700 Subject: working toward parse of 'Received: from' line --- tools/mail-scan.c | 27 ++++++++--- tools/skrewt.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 151 insertions(+), 8 deletions(-) diff --git a/tools/mail-scan.c b/tools/mail-scan.c index 3c53b2b..5378e89 100644 --- a/tools/mail-scan.c +++ b/tools/mail-scan.c @@ -44,6 +44,7 @@ void usage(const int sts){ " -vert invert: print only if *no* match.\n" " -l print filename only, not matching text.\n" " -group print a blank line after every match.\n" +" -max nn print at most nn records per file.\n" " -multi print multi-line records on multiple lines\n" " (as opposed to smashing them all onto one long line\n" " -addr assume field contains somebody ; print just foo@bar.com\n" @@ -183,6 +184,7 @@ int main(int _argc, const char** _argv){ int vflag(0); int group_flag(0); int multi(0); + int maxlines(0); int fname_only(0); int addr_mode(0); boost::regex addr_filter(string("<.*@(.*)>"), boost::regex_constants::icase); @@ -205,6 +207,14 @@ int main(int _argc, const char** _argv){ } else if (prefix(arg, "-address" /* long */)){ addr_mode++; continue; + } else if (prefix(arg, "-max" /* long */)){ + if (!argc){ + cerr << "Option '" << arg << "' requires an argument" << endl; + cerr << "For help, try: " << progname << " -help" << endl; + exit(sa_usage); + } + maxlines = atoi(*argv++); argc--; + continue; } else if (prefix(arg, "-multi" /* long */)){ multi++; continue; @@ -296,14 +306,16 @@ int main(int _argc, const char** _argv){ } cout << endl; didprint++; + if (maxlines && didprint >= maxlines) goto endfile; } else /* addr_mode */{ - boost::smatch matches; - if (boost::regex_search(header, matches, addr_filter)){ - cout << string(matches[1].first, matches[1].second) << endl; - } else { - cerr << "no match, ignoring: " << header << endl; - } - didprint++; + boost::smatch matches; + if (boost::regex_search(header, matches, addr_filter)){ + cout << string(matches[1].first, matches[1].second) << endl; + } else { + cerr << "no match, ignoring: " << header << endl; + } + didprint++; + if (maxlines && didprint >= maxlines) goto endfile; } } } @@ -316,6 +328,7 @@ int main(int _argc, const char** _argv){ break; } } // end loop over matching records in this file +endfile:;;;; if (vflag && !foundsome_infile) { cout << *file << endl; diff --git a/tools/skrewt.c b/tools/skrewt.c index 4d69862..daa3224 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -180,6 +180,11 @@ string join(const string sep, const list stuff){ class skrewt{ public: + string received_from; // envelope HELO among other things + string proximta_HELO; + string proximta_rDNS; + string proximta_IP; + string return_path; // envelope MAIL FROM string boundary; string to; string from; @@ -205,8 +210,42 @@ public: int headers(); int interstage(); int body(); + int krunch_rfrom(); }; +#if 0 /* typical "Received: from" lines */ +Received: from lists.sourceforge.net (216.34.181.88) + by cloud.av8n.com with SMTP; 31 Jul 2012 22:13:48 -0000 + +Received: from 24-145-119-127-dhcp.gsv.md.atlanticbb.net (HELO mail.phys-l.org) (24.145.119.127) by cloud.av8n.com with SMTP; 14 Jul 2012 23:56:54 -0000 + +Received: from ip68-231-191-153.tc.ph.cox.net (HELO asclepias.av8n.net) (smtp@68.231.191.153) by cloud.av8n.com with SMTP; 15 Jul 2012 14:39:58 -0000 +#endif + +int skrewt::krunch_rfrom(){ + stringstream parse; + parse.str(received_from); + string word; + parse >> word; + if (word != "from") { + cerr << progid << " bad 'Received: from' line ... '" + << word << "'" << endl; + return ex_syserr; + } + parse >> proximta_rDNS; + parse >> word; + if (word == "(HELO") { + parse >> proximta_HELO; + proximta_HELO = rtrim(proximta_HELO, "()"); + } else { + proximta_HELO = proximta_rDNS; + } + + + + return 0; +} + int skrewt::headers(){ //xxxx cerr << progid << " begins" << endl; for (;;){ // outer loop over all records in the header @@ -266,8 +305,14 @@ int skrewt::headers(){ from = rest; } else if (headword == "to") { to = rest; + } else if (headword == "return-path") { + return_path = rest; } else if (headword == "message-id") { message_id = rest; + } else if (headword == "received") { + if (!received_from.length() && prefix("from ", rest)){ + received_from = rest; + } } else if (headword == "date") { date = rest; } else if (headword == "subject") { @@ -285,7 +330,14 @@ int skrewt::headers(){ int skrewt::interstage(){ if (saw_blank_line) {/* ignore */} - cerr << progid <<" Mid '" << message_id << "'" << endl; +// Note that the headers are in reverse-chronological order: + cerr << progid <<" Return-path: " << return_path <"}' | sort | uniq -c | sort -nr + 81 + 17 + 8 + 8 + 7 + 6 + 5 + 4 + 4 + 4 + 3 + 2 + 2 + 2 + 1 + 1 + 1 + 1 + 1 + 1 + 1 +#endif + +#if 0 /* DKIM users */ + 52 d=googlegroups.com; + 27 d=barackobama.com; + 10 d=gmail.com; + 5 d=bronto.com; + 5 d=bluehornet.com; + 4 d=news.abebooks.com; + 2 d=yahoo.co.uk; + 2 d=sbcglobal.net; + 2 d=embarqmail.com; + 2 d=emailms.angieslist.com; + 1 d=newsletters.sourceforge.net; + 1 d=members.ebay.com; + 1 d=info.citibank.com; + 1 d=ebay.com; + 1 d=commail1.co.za; +#endif + list badnews; if (subject.find("-please-bounce-this-") != string::npos) { -- cgit v1.2.3