summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJohn Denker <jsd@av8n.com>2012-07-31 17:48:33 -0700
committerJohn Denker <jsd@av8n.com>2012-07-31 17:48:52 -0700
commit2bda79fc4e14a9250ad6ebefb67c7379067a4a42 (patch)
tree3b825e4a2dde4f8fc943aa99505f22250b06992b /tools
parenta51b9f035986ea1d4365b8641c07dd7e2b318ca3 (diff)
working toward parse of 'Received: from' line
Diffstat (limited to 'tools')
-rw-r--r--tools/mail-scan.c27
-rw-r--r--tools/skrewt.c132
2 files changed, 151 insertions, 8 deletions
diff --git a/tools/mail-scan.c b/tools/mail-scan.c
index 3c53b2b..5378e89 100644
--- a/tools/mail-scan.c
+++ b/tools/mail-scan.c
@@ -44,6 +44,7 @@ void usage(const int sts){
" -vert invert: print only if *no* match.\n"
" -l print filename only, not matching text.\n"
" -group print a blank line after every match.\n"
+" -max nn print at most nn records per file.\n"
" -multi print multi-line records on multiple lines\n"
" (as opposed to smashing them all onto one long line\n"
" -addr assume field contains somebody <foo@bar.com>; print just foo@bar.com\n"
@@ -183,6 +184,7 @@ int main(int _argc, const char** _argv){
int vflag(0);
int group_flag(0);
int multi(0);
+ int maxlines(0);
int fname_only(0);
int addr_mode(0);
boost::regex addr_filter(string("<.*@(.*)>"), boost::regex_constants::icase);
@@ -205,6 +207,14 @@ int main(int _argc, const char** _argv){
} else if (prefix(arg, "-address" /* long */)){
addr_mode++;
continue;
+ } else if (prefix(arg, "-max" /* long */)){
+ if (!argc){
+ cerr << "Option '" << arg << "' requires an argument" << endl;
+ cerr << "For help, try: " << progname << " -help" << endl;
+ exit(sa_usage);
+ }
+ maxlines = atoi(*argv++); argc--;
+ continue;
} else if (prefix(arg, "-multi" /* long */)){
multi++;
continue;
@@ -296,14 +306,16 @@ int main(int _argc, const char** _argv){
}
cout << endl;
didprint++;
+ if (maxlines && didprint >= maxlines) goto endfile;
} else /* addr_mode */{
- boost::smatch matches;
- if (boost::regex_search(header, matches, addr_filter)){
- cout << string(matches[1].first, matches[1].second) << endl;
- } else {
- cerr << "no match, ignoring: " << header << endl;
- }
- didprint++;
+ boost::smatch matches;
+ if (boost::regex_search(header, matches, addr_filter)){
+ cout << string(matches[1].first, matches[1].second) << endl;
+ } else {
+ cerr << "no match, ignoring: " << header << endl;
+ }
+ didprint++;
+ if (maxlines && didprint >= maxlines) goto endfile;
}
}
}
@@ -316,6 +328,7 @@ int main(int _argc, const char** _argv){
break;
}
} // end loop over matching records in this file
+endfile:;;;;
if (vflag && !foundsome_infile) {
cout << *file << endl;
diff --git a/tools/skrewt.c b/tools/skrewt.c
index 4d69862..daa3224 100644
--- a/tools/skrewt.c
+++ b/tools/skrewt.c
@@ -180,6 +180,11 @@ string join(const string sep, const list<string> stuff){
class skrewt{
public:
+ string received_from; // envelope HELO among other things
+ string proximta_HELO;
+ string proximta_rDNS;
+ string proximta_IP;
+ string return_path; // envelope MAIL FROM
string boundary;
string to;
string from;
@@ -205,8 +210,42 @@ public:
int headers();
int interstage();
int body();
+ int krunch_rfrom();
};
+#if 0 /* typical "Received: from" lines */
+Received: from lists.sourceforge.net (216.34.181.88)
+ by cloud.av8n.com with SMTP; 31 Jul 2012 22:13:48 -0000
+
+Received: from 24-145-119-127-dhcp.gsv.md.atlanticbb.net (HELO mail.phys-l.org) (24.145.119.127) by cloud.av8n.com with SMTP; 14 Jul 2012 23:56:54 -0000
+
+Received: from ip68-231-191-153.tc.ph.cox.net (HELO asclepias.av8n.net) (smtp@68.231.191.153) by cloud.av8n.com with SMTP; 15 Jul 2012 14:39:58 -0000
+#endif
+
+int skrewt::krunch_rfrom(){
+ stringstream parse;
+ parse.str(received_from);
+ string word;
+ parse >> word;
+ if (word != "from") {
+ cerr << progid << " bad 'Received: from' line ... '"
+ << word << "'" << endl;
+ return ex_syserr;
+ }
+ parse >> proximta_rDNS;
+ parse >> word;
+ if (word == "(HELO") {
+ parse >> proximta_HELO;
+ proximta_HELO = rtrim(proximta_HELO, "()");
+ } else {
+ proximta_HELO = proximta_rDNS;
+ }
+
+
+
+ return 0;
+}
+
int skrewt::headers(){
//xxxx cerr << progid << " begins" << endl;
for (;;){ // outer loop over all records in the header
@@ -266,8 +305,14 @@ int skrewt::headers(){
from = rest;
} else if (headword == "to") {
to = rest;
+ } else if (headword == "return-path") {
+ return_path = rest;
} else if (headword == "message-id") {
message_id = rest;
+ } else if (headword == "received") {
+ if (!received_from.length() && prefix("from ", rest)){
+ received_from = rest;
+ }
} else if (headword == "date") {
date = rest;
} else if (headword == "subject") {
@@ -285,7 +330,14 @@ int skrewt::headers(){
int skrewt::interstage(){
if (saw_blank_line) {/* ignore */}
- cerr << progid <<" Mid '" << message_id << "'" << endl;
+// Note that the headers are in reverse-chronological order:
+ cerr << progid <<" Return-path: " << return_path <<endl;
+ cerr << " Received: " << received_from <<endl;
+ int rslt = krunch_rfrom();
+ if (rslt) return rslt;
+ cerr << " rDNS: " << proximta_rDNS << endl;
+ cerr << " HELO: " << proximta_HELO << endl;
+ cerr << " Mid '" << message_id << "'" << endl;
// The logic here is: In order:
// 1) If whitelisted, accept. No greylisting, no spam-checking.
@@ -310,6 +362,84 @@ int skrewt::interstage(){
// 4c) If it is in the "green" state, let the timer
// continue from where it is. Reject with temporary error.
+// Note: Reputation normally attaches to a domain.
+// With SPF, the domain vouches for the sender at a given IP address
+// ... and then the sender implicitly vouches for the message.
+// With DKIM, the domain vouches for an individual message.
+// With neither SPF nor DKIM, reputation attaches to the sender's
+// IP address. The sender vouches for the message.
+//
+// During greylisting, delay applies to the message. Reputation
+// applies to the domain (via SPF or DKIM) or to the server
+// (otherwise).
+
+
+// If you are a medium-sized operator, such that you have one
+// and only one IP address that ever sends email, and it is a
+// static IP address, then you don't have much to gain from
+// DKIM or SPF. Attaching a reputation to your domain is not
+// much different from attaching a reputation to your IP address.
+
+// In constrast, if you are a low-budget operator with a
+// dynamic IP address, you benefit from SPF and/or DKIM.
+// Your reputation attaches to your domain, and remains
+// stable even as your IP address changes.
+
+// At the other extreme, if you are a big-time operator
+// such as googlegroups.com, you benefit from DKIM and/or
+// SPF. Your IP addresses are not dynamic, but they are
+// numerous, so you prefer to have your reputation apply
+// to all your email-sending hosts.
+
+#if 0 /* typical Received-SPF line */
+ Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70;
+#endif
+
+#if 0 /* SPF users */
+ :; mail-scan +received-spf /home/jsd/Maildir/cur[/]* |
+ sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' |
+ awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr
+ 81 <gmail.com>
+ 17 <mac.com>
+ 8 <gmx.net>
+ 8 <bbruner@gmail.com>
+ 7 <jsd@av8n.com>
+ 6 <kst24@cam.ac.uk>
+ 5 <farooq.w@gmail.com>
+ 4 <scerri@chem.ucla.edu>
+ 4 <comcast.net>
+ 4 <c2i.net>
+ 3 <gemort2006@gmail.com>
+ 2 <rrhake@earthlink.net>
+ 2 <hotmail.com>
+ 2 <GCC.EDU>
+ 1 <us.panasonic.com>
+ 1 <sss.pgh.pa.us>
+ 1 <scot_wherland@wsu.edu>
+ 1 <rpendarvis@brenau.edu>
+ 1 <hmperks@gmail.com>
+ 1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu>
+ 1 <arcor.de>
+#endif
+
+#if 0 /* DKIM users */
+ 52 d=googlegroups.com;
+ 27 d=barackobama.com;
+ 10 d=gmail.com;
+ 5 d=bronto.com;
+ 5 d=bluehornet.com;
+ 4 d=news.abebooks.com;
+ 2 d=yahoo.co.uk;
+ 2 d=sbcglobal.net;
+ 2 d=embarqmail.com;
+ 2 d=emailms.angieslist.com;
+ 1 d=newsletters.sourceforge.net;
+ 1 d=members.ebay.com;
+ 1 d=info.citibank.com;
+ 1 d=ebay.com;
+ 1 d=commail1.co.za;
+#endif
+
list<string> badnews;
if (subject.find("-please-bounce-this-") != string::npos) {