summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/libskrewt.c260
-rw-r--r--tools/libskrewt.h4
-rw-r--r--tools/ward.c260
3 files changed, 264 insertions, 260 deletions
diff --git a/tools/libskrewt.c b/tools/libskrewt.c
index 602fdb7..812b546 100644
--- a/tools/libskrewt.c
+++ b/tools/libskrewt.c
@@ -2,6 +2,7 @@
#include "utils.h"
#include <iostream>
#include <sstream>
+#include <signal.h>
using namespace std;
@@ -84,3 +85,262 @@ int skrewt::krunch_rfrom(){
return 0;
}
+
+int skrewt::interstage(){
+ if (saw_blank_line) {/* ignore */}
+// Note that the headers are in reverse-chronological order:
+ cerr << progid <<" Return-path: " << return_path <<endl;
+
+ { // parse the 'Received: from' line:
+ cerr << " Received: " << received_from <<endl;
+ int rslt = krunch_rfrom();
+ if (rslt) return rslt;
+ cerr << " rDNS: " << proximta_rDNS << endl;
+ cerr << " HELO: " << proximta_HELO << endl;
+ cerr << " IP: " << proximta_IP << endl;
+ cerr << " AuthUser: " << proximta_AuthUser << endl;
+ cerr << " Mid '" << message_id << "'" << endl;
+ }
+
+ sepofra my_spf;
+ try {
+ my_spf.check(proximta_IP,
+ proximta_HELO,
+ return_path,
+ "junk", 0/* verbosity */);
+ cerr << "*** " << my_spf.explain() << endl;
+ } catch (bad_thing foo) {
+ cerr << "Caught bad thing: " << foo.what() << endl;
+ return ex_syserr;
+ }
+
+// The logic here is: In order:
+// 1:: If whitelisted, accept. No greylisting, no spam-checking.
+// 2:: If blacklisted, reject. No greylisting, no spam-checking.
+// 3:: If good reputation, spam-check it and send it on its way.
+// 4:: If no reputation, greylist.
+// 5:: If bad reputation, ????
+
+// Expanding item 3 to the next level of detail:
+// 3a:: If some domain vouches for this sender-IP via SPF,
+// then the reputation is bound to the domain.
+// 3c:: If some domain vouches for the message vie DKIM,
+// then the reputation is bound to the domain.
+// 3d:: If no SPF or DKIM, then the reputation attaches
+// to the sender-IP.
+
+// Expanding item 4 to the next level of detail:
+// 4a:: If the greylisting database says this message is ripe
+// spam-check it. If it's OK, use it to count toward reputation.
+// 4b:: If it is previously unseen or too old, start greylisting
+// timer from scratch. Reject with temporary error.
+// 4c:: If it is in the "green" state, let the timer
+// continue from where it is. Reject with temporary error.
+
+// Note: Reputation normally attaches to a domain.
+// With SPF, the domain vouches for the sender at a given IP address
+// ... and then the sender implicitly vouches for the message.
+// With DKIM, the domain vouches for an individual message.
+// With neither SPF nor DKIM, reputation attaches to the sender's
+// IP address. The sender vouches for the message.
+//
+// During greylisting, delay applies to the message. Reputation
+// applies to the domain (via SPF or DKIM) or to the server
+// (otherwise).
+
+
+// If you are a medium-sized operator, such that you have one
+// and only one IP address that ever sends email, and it is a
+// static IP address, then you don't have much to gain from
+// DKIM or SPF. Attaching a reputation to your domain is not
+// much different from attaching a reputation to your IP address.
+
+// In constrast, if you are a low-budget operator with a
+// dynamic IP address, you benefit from SPF and/or DKIM.
+// Your reputation attaches to your domain, and remains
+// stable even as your IP address changes.
+
+// At the other extreme, if you are a big-time operator
+// such as googlegroups.com, you benefit from DKIM and/or
+// SPF. Your IP addresses are not dynamic, but they are
+// numerous, so you prefer to have your reputation apply
+// to all your email-sending hosts.
+
+#if 0 /* typical Received-SPF line */
+ Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70;
+#endif
+
+#if 0 /* SPF users */
+ :; mail-scan +received-spf /home/jsd/Maildir/cur[/]* |
+ sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' |
+ awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr
+ 81 <gmail.com>
+ 17 <mac.com>
+ 8 <gmx.net>
+ 8 <bbruner@gmail.com>
+ 7 <jsd@av8n.com>
+ 6 <kst24@cam.ac.uk>
+ 5 <farooq.w@gmail.com>
+ 4 <scerri@chem.ucla.edu>
+ 4 <comcast.net>
+ 4 <c2i.net>
+ 3 <gemort2006@gmail.com>
+ 2 <rrhake@earthlink.net>
+ 2 <hotmail.com>
+ 2 <GCC.EDU>
+ 1 <us.panasonic.com>
+ 1 <sss.pgh.pa.us>
+ 1 <scot_wherland@wsu.edu>
+ 1 <rpendarvis@brenau.edu>
+ 1 <hmperks@gmail.com>
+ 1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu>
+ 1 <arcor.de>
+#endif
+
+#if 0 /* DKIM users */
+ 52 d=googlegroups.com;
+ 27 d=barackobama.com;
+ 10 d=gmail.com;
+ 5 d=bronto.com;
+ 5 d=bluehornet.com;
+ 4 d=news.abebooks.com;
+ 2 d=yahoo.co.uk;
+ 2 d=sbcglobal.net;
+ 2 d=embarqmail.com;
+ 2 d=emailms.angieslist.com;
+ 1 d=newsletters.sourceforge.net;
+ 1 d=members.ebay.com;
+ 1 d=info.citibank.com;
+ 1 d=ebay.com;
+ 1 d=commail1.co.za;
+#endif
+
+ list<string> badnews;
+ int whitelisted(0);
+
+ if (subject.find("sesame") != string::npos
+ && subject.find("swordfish") != string::npos) {
+ whitelisted++;
+ }
+
+ if (delivered_to.length()){
+ cerr << progid << " Delivered-to: <<<" << delivered_to << ">>>" << endl;
+ }
+ if (toLower(trim(delivered_to)) == "jean@av8n.com") {
+ badnews.push_back("Looping Delivered-to: " + delivered_to);
+ }
+
+ if (subject.find("-please-bounce-this-") != string::npos) {
+ badnews.push_back("by request");
+ }
+
+ if (!date.length()) {
+ badnews.push_back("no date");
+ }
+
+ if (mid_required && !message_id.length()) {
+ badnews.push_back("no message-id");
+ }
+
+ if (badnews.size() && !whitelisted){
+ cerr << progid << " " << join(", ", badnews) << endl;
+ if (error_exit){
+ cerr << progid << " '" << from
+ << "' to '" << to
+ << "'" << endl;
+ exeunt(ex_spam);
+ }
+ }
+ return 0;
+}
+
+int skrewt::body(){
+ string main_contype;
+ if (content_type.length())
+ parse_content(content_type, main_contype, boundary);
+// some slightly-useful booleans:
+ int currently_text = main_contype == "text";
+ int main_multipart = main_contype == "multipart";
+
+// early-stage thinking has been done.
+// Now spew the rest of the message
+ //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl;
+ int in_subheads(0);
+ int textlines(0);
+
+ for (;;){ // outer loop over all lines in the body
+ if (cin.eof()) break;
+ if (cin.bad()) return 1;
+ string line;
+// on fail, go back to top of outer loop and check for eof versus bad
+ if (getline(cin, line).fail()) continue;
+ msgsize += line.length()+1;
+ if (msgsize > maxsize) {
+ cerr << progid << " rejection: bigger than " << maxsize << endl;
+ maybe_exeunt(ex_spam, error_exit);
+ }
+ bigbuf.push_back(line);
+ cout << line << endl;
+ if (in_subheads){
+ if (line == "" || line == "\r") in_subheads = 0;
+ }
+ if (in_subheads){
+ string sub_contype;
+ string junk;
+// in principle could worry about folded headers,
+// but in this application it doesn't actually matter
+ string headword;
+ string rest;
+ size_t where = line.find(":");
+ if (where != string::npos) {
+ headword = line.substr(0, where);
+ rest = ltrim(line.substr(1+where));
+ }
+ headword = toLower(headword);
+ if (headword == "content-type") {
+ parse_content(rest, sub_contype, junk);
+ currently_text = sub_contype == "text";
+ //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl;
+ }
+ } else {
+ if (main_multipart && line == "--" + boundary) {
+ //xxxx cerr << "found subhead boundary" << endl;
+ in_subheads = 1;
+ continue;
+ }
+ if (currently_text) textlines++;
+ }
+ }
+
+ if (0) cerr << "textlines: " << textlines << endl;
+ if (!textlines) {
+ cerr << progid << " rejection: no text: " << error_exit << endl;
+ maybe_exeunt(ex_spam, error_exit);
+ }
+ cerr << progid << " normal completion" << endl;
+ return(ex_good);
+}
+
+void maybe_exeunt(const int sts, const int really){
+ if (!really) return;
+ if (sts == ex_good) exit(sts);
+
+ const char* foo = getenv("HI_Q_GROUP");
+ if (!foo) exit(sts);
+
+// No point in signalling ourself:
+ sighandler_t rslt = signal(SIGUSR1, SIG_IGN);
+ if (rslt == SIG_ERR) {
+ cerr << "error setting signal" << endl;
+ }
+ int k = kill(-atoi(foo), SIGUSR1);
+ if (k) {
+ cerr << "kill failed on group " << atoi(foo) << " ... ";
+ perror(0);
+ }
+ exit(sts);
+}
+
+void exeunt(const int sts){
+ maybe_exeunt(sts, 1);
+}
diff --git a/tools/libskrewt.h b/tools/libskrewt.h
index 94d06dd..c5fea7e 100644
--- a/tools/libskrewt.h
+++ b/tools/libskrewt.h
@@ -1,5 +1,6 @@
#include <string>
#include <vector>
+#include "sepofra.h"
#include "qq_exit_codes.h" // a bit of a kludge
extern std::string progid;
@@ -42,3 +43,6 @@ public:
void parse_content(const std::string type_spec_line,
std::string &maintype, std::string &boundary);
+
+void exeunt(const int sts);
+void maybe_exeunt(const int sts, const int really);
diff --git a/tools/ward.c b/tools/ward.c
index 6280a28..277fd76 100644
--- a/tools/ward.c
+++ b/tools/ward.c
@@ -8,7 +8,6 @@
#include <stdlib.h> /* for exit() */
#include <string> /* for strcmp() */
#include <ctype.h> /* toupper */
-#include <signal.h>
#include <stdio.h> /* perror */
#include <sstream>
@@ -38,33 +37,8 @@ void usage(const int sts){
}
#include "libskrewt.h"
-#include "sepofra.h"
#include "utils.h"
-void maybe_exeunt(const int sts, const int really){
- if (!really) return;
- if (sts == ex_good) exit(sts);
-
- const char* foo = getenv("HI_Q_GROUP");
- if (!foo) exit(sts);
-
-// No point in signalling ourself:
- sighandler_t rslt = signal(SIGUSR1, SIG_IGN);
- if (rslt == SIG_ERR) {
- cerr << "error setting signal" << endl;
- }
- int k = kill(-atoi(foo), SIGUSR1);
- if (k) {
- cerr << "kill failed on group " << atoi(foo) << " ... ";
- perror(0);
- }
- exit(sts);
-}
-
-void exeunt(const int sts){
- maybe_exeunt(sts, 1);
-}
-
string progname, progid;
int mypid;
@@ -181,240 +155,6 @@ int skrewt::headers(){
return 0;
}
-int skrewt::interstage(){
- if (saw_blank_line) {/* ignore */}
-// Note that the headers are in reverse-chronological order:
- cerr << progid <<" Return-path: " << return_path <<endl;
-
- { // parse the 'Received: from' line:
- cerr << " Received: " << received_from <<endl;
- int rslt = krunch_rfrom();
- if (rslt) return rslt;
- cerr << " rDNS: " << proximta_rDNS << endl;
- cerr << " HELO: " << proximta_HELO << endl;
- cerr << " IP: " << proximta_IP << endl;
- cerr << " AuthUser: " << proximta_AuthUser << endl;
- cerr << " Mid '" << message_id << "'" << endl;
- }
-
- sepofra my_spf;
- try {
- my_spf.check(proximta_IP,
- proximta_HELO,
- return_path,
- "junk", 0/* verbosity */);
- cerr << "*** " << my_spf.explain() << endl;
- } catch (bad_thing foo) {
- cerr << "Caught bad thing: " << foo.what() << endl;
- return ex_syserr;
- }
-
-// The logic here is: In order:
-// 1:: If whitelisted, accept. No greylisting, no spam-checking.
-// 2:: If blacklisted, reject. No greylisting, no spam-checking.
-// 3:: If good reputation, spam-check it and send it on its way.
-// 4:: If no reputation, greylist.
-// 5:: If bad reputation, ????
-
-// Expanding item 3 to the next level of detail:
-// 3a:: If some domain vouches for this sender-IP via SPF,
-// then the reputation is bound to the domain.
-// 3c:: If some domain vouches for the message vie DKIM,
-// then the reputation is bound to the domain.
-// 3d:: If no SPF or DKIM, then the reputation attaches
-// to the sender-IP.
-
-// Expanding item 4 to the next level of detail:
-// 4a:: If the greylisting database says this message is ripe
-// spam-check it. If it's OK, use it to count toward reputation.
-// 4b:: If it is previously unseen or too old, start greylisting
-// timer from scratch. Reject with temporary error.
-// 4c:: If it is in the "green" state, let the timer
-// continue from where it is. Reject with temporary error.
-
-// Note: Reputation normally attaches to a domain.
-// With SPF, the domain vouches for the sender at a given IP address
-// ... and then the sender implicitly vouches for the message.
-// With DKIM, the domain vouches for an individual message.
-// With neither SPF nor DKIM, reputation attaches to the sender's
-// IP address. The sender vouches for the message.
-//
-// During greylisting, delay applies to the message. Reputation
-// applies to the domain (via SPF or DKIM) or to the server
-// (otherwise).
-
-
-// If you are a medium-sized operator, such that you have one
-// and only one IP address that ever sends email, and it is a
-// static IP address, then you don't have much to gain from
-// DKIM or SPF. Attaching a reputation to your domain is not
-// much different from attaching a reputation to your IP address.
-
-// In constrast, if you are a low-budget operator with a
-// dynamic IP address, you benefit from SPF and/or DKIM.
-// Your reputation attaches to your domain, and remains
-// stable even as your IP address changes.
-
-// At the other extreme, if you are a big-time operator
-// such as googlegroups.com, you benefit from DKIM and/or
-// SPF. Your IP addresses are not dynamic, but they are
-// numerous, so you prefer to have your reputation apply
-// to all your email-sending hosts.
-
-#if 0 /* typical Received-SPF line */
- Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70;
-#endif
-
-#if 0 /* SPF users */
- :; mail-scan +received-spf /home/jsd/Maildir/cur[/]* |
- sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' |
- awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr
- 81 <gmail.com>
- 17 <mac.com>
- 8 <gmx.net>
- 8 <bbruner@gmail.com>
- 7 <jsd@av8n.com>
- 6 <kst24@cam.ac.uk>
- 5 <farooq.w@gmail.com>
- 4 <scerri@chem.ucla.edu>
- 4 <comcast.net>
- 4 <c2i.net>
- 3 <gemort2006@gmail.com>
- 2 <rrhake@earthlink.net>
- 2 <hotmail.com>
- 2 <GCC.EDU>
- 1 <us.panasonic.com>
- 1 <sss.pgh.pa.us>
- 1 <scot_wherland@wsu.edu>
- 1 <rpendarvis@brenau.edu>
- 1 <hmperks@gmail.com>
- 1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu>
- 1 <arcor.de>
-#endif
-
-#if 0 /* DKIM users */
- 52 d=googlegroups.com;
- 27 d=barackobama.com;
- 10 d=gmail.com;
- 5 d=bronto.com;
- 5 d=bluehornet.com;
- 4 d=news.abebooks.com;
- 2 d=yahoo.co.uk;
- 2 d=sbcglobal.net;
- 2 d=embarqmail.com;
- 2 d=emailms.angieslist.com;
- 1 d=newsletters.sourceforge.net;
- 1 d=members.ebay.com;
- 1 d=info.citibank.com;
- 1 d=ebay.com;
- 1 d=commail1.co.za;
-#endif
-
- list<string> badnews;
- int whitelisted(0);
-
- if (subject.find("sesame") != string::npos
- && subject.find("swordfish") != string::npos) {
- whitelisted++;
- }
-
- if (delivered_to.length()){
- cerr << progid << " Delivered-to: <<<" << delivered_to << ">>>" << endl;
- }
- if (toLower(trim(delivered_to)) == "jean@av8n.com") {
- badnews.push_back("Looping Delivered-to: " + delivered_to);
- }
-
- if (subject.find("-please-bounce-this-") != string::npos) {
- badnews.push_back("by request");
- }
-
- if (!date.length()) {
- badnews.push_back("no date");
- }
-
- if (mid_required && !message_id.length()) {
- badnews.push_back("no message-id");
- }
-
- if (badnews.size() && !whitelisted){
- cerr << progid << " " << join(", ", badnews) << endl;
- if (error_exit){
- cerr << progid << " '" << from
- << "' to '" << to
- << "'" << endl;
- exeunt(ex_spam);
- }
- }
- return 0;
-}
-
-int skrewt::body(){
- string main_contype;
- if (content_type.length())
- parse_content(content_type, main_contype, boundary);
-// some slightly-useful booleans:
- int currently_text = main_contype == "text";
- int main_multipart = main_contype == "multipart";
-
-// early-stage thinking has been done.
-// Now spew the rest of the message
- //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl;
- int in_subheads(0);
- int textlines(0);
-
- for (;;){ // outer loop over all lines in the body
- if (cin.eof()) break;
- if (cin.bad()) return 1;
- string line;
-// on fail, go back to top of outer loop and check for eof versus bad
- if (getline(cin, line).fail()) continue;
- msgsize += line.length()+1;
- if (msgsize > maxsize) {
- cerr << progid << " rejection: bigger than " << maxsize << endl;
- maybe_exeunt(ex_spam, error_exit);
- }
- bigbuf.push_back(line);
- cout << line << endl;
- if (in_subheads){
- if (line == "" || line == "\r") in_subheads = 0;
- }
- if (in_subheads){
- string sub_contype;
- string junk;
-// in principle could worry about folded headers,
-// but in this application it doesn't actually matter
- string headword;
- string rest;
- size_t where = line.find(":");
- if (where != string::npos) {
- headword = line.substr(0, where);
- rest = ltrim(line.substr(1+where));
- }
- headword = toLower(headword);
- if (headword == "content-type") {
- parse_content(rest, sub_contype, junk);
- currently_text = sub_contype == "text";
- //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl;
- }
- } else {
- if (main_multipart && line == "--" + boundary) {
- //xxxx cerr << "found subhead boundary" << endl;
- in_subheads = 1;
- continue;
- }
- if (currently_text) textlines++;
- }
- }
-
- if (0) cerr << "textlines: " << textlines << endl;
- if (!textlines) {
- cerr << progid << " rejection: no text: " << error_exit << endl;
- maybe_exeunt(ex_spam, error_exit);
- }
- cerr << progid << " normal completion" << endl;
- return(ex_good);
-}
////////////////////////////////////////////////////////////
int main(int _argc, const char** _argv){