From 60fd39ff24975486da7d02cdf07abae31c525529 Mon Sep 17 00:00:00 2001 From: John Denker Date: Fri, 20 Jul 2012 12:15:59 -0700 Subject: much smarter about exit status conventions --- tools/skrewt.c | 52 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index 2ca32a4..d0289a6 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -19,7 +19,7 @@ void usage(const int sts){ "Usage: skrewt [options]\n" "\n" " Scrutinizes email. Reads stdin, copies it to stdout.\n" -" Exit result 0 means good, 1 means rejection (spam).\n" +" Exit result 0 means good, 21 means rejection (spam).\n" " Writes reason for rejection to stderr.\n" "\n" " Typically used as a filter in a pipeline, along with spamc -E\n" @@ -33,10 +33,20 @@ void usage(const int sts){ exit(sts); } -// exit codes, compatible with spamassassin (not with qmail-queue) -const int sa_good(0); -const int sa_spam(1); -const int sa_usage(64); +// error exit codes, mostly as stated in qmail.c +#define bar \ +foo(good, 0) ;\ +foo(spam, 21) ;\ +foo(permerr, 31) ;\ +foo(usage, 39) ;\ +foo(greylisting, 70) ;\ +foo(syserr, 71) ;\ +foo(comerr, 74) ; + +#define foo(name, num) const int ex_ ## name = num +bar +#undef foo + ///////////////////////////////////////////////////////// // Case insensitive comparison of strings @@ -106,7 +116,7 @@ int prefix(const string shorter, const string longer){ } void exeunt(const int sts){ - if (sts == sa_good) exit(sts); + if (sts == ex_good) exit(sts); const char* foo = getenv("HI_Q_GROUP"); if (!foo) exit(sts); @@ -144,18 +154,18 @@ int main(int _argc, const char** _argv){ if (prefix(arg, "-maxsize")) { if (!argc) { cerr << "Option -maxsize requires an argument" << endl; - exit(sa_usage); + exit(ex_usage); } maxsize = atoi(*argv); argv++; argc--; } if (arg.substr(0,1) == "-") { cerr << "Unrecognized option '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; - exit(sa_usage); + exit(ex_usage); } else { cerr << "Extraneous verbiage '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; - exit(sa_usage); + exit(ex_usage); } } @@ -172,6 +182,10 @@ int main(int _argc, const char** _argv){ string header; if (getline(cin, header).fail()) continue; msgsize += header.length()+1; + if (msgsize > maxsize) { + cerr << "skrewt rejection: bigger than " << maxsize << endl; + exeunt(ex_spam); + } for (;;) { if (cin.eof()) break; if (cin.bad()) return 1; @@ -184,18 +198,22 @@ int main(int _argc, const char** _argv){ msgsize += line.length()+1; if (msgsize > maxsize) { cerr << "skrewt rejection: bigger than " << maxsize << endl; - exeunt(sa_spam); + exeunt(ex_spam); } header += "\n" + line; } - if (header.length() == 0) { + int len = header.length(); + if (len && header[len-1] == '\r') len--; // reduced length, not counting + if (len == 0) { if (!gotdate) { cerr << "skrewt rejection: no date" << endl; - exeunt(sa_spam); // disallow mail with no date + exeunt(ex_spam); // disallow mail with no date } inheads = 0; + //cerr << "end of headers" << endl; } else { +// here if it's a header line string headword; string rest; size_t where = header.find(":"); @@ -239,18 +257,18 @@ int main(int _argc, const char** _argv){ } else if (headword == "subject") { if (rest.find("-please-bounce-this-") != string::npos) { cerr << "skrewt rejection: by request" << endl; - exeunt(sa_spam); + exeunt(ex_spam); } } } - cout << header << endl; + //cout << header.length() << " ... " << header << endl; } else { string line; if (!getline(cin, line).fail()) { msgsize += line.length()+1; if (msgsize > maxsize) { cerr << "skrewt rejection: bigger than " << maxsize << endl; - exeunt(sa_spam); + exeunt(ex_spam); } if (line == "--" + boundary) { inheads = 1; @@ -266,8 +284,8 @@ int main(int _argc, const char** _argv){ if (0) cerr << "textlines: " << textlines << endl; if (!textlines) { cerr << "skrewt rejection: no text" << endl; - exeunt(sa_spam); + exeunt(ex_spam); } cerr << "skrewt normal completion" << endl; - exit(sa_good); + exit(ex_good); } -- cgit v1.2.3 From 51f3d88572ae0f8eea40996db28ece9cdd0dae18 Mon Sep 17 00:00:00 2001 From: John Denker Date: Fri, 20 Jul 2012 13:59:53 -0700 Subject: gaaack -- fix horrible bug --- tools/skrewt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index d0289a6..44e885b 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -261,7 +261,8 @@ int main(int _argc, const char** _argv){ } } } - //cout << header.length() << " ... " << header << endl; + //xxxx cout << header.length() << " ... "; + cout << header << endl; } else { string line; if (!getline(cin, line).fail()) { -- cgit v1.2.3 From 6c56d656bd407bc5fc4dd713aacb553ebefee892 Mon Sep 17 00:00:00 2001 From: John Denker Date: Sun, 22 Jul 2012 14:44:49 -0700 Subject: regularize log/progress messages --- tools/skrewt.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index 44e885b..6de3dd9 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -11,6 +11,7 @@ #include #include /* perror */ +#include using namespace std; @@ -134,6 +135,15 @@ void exeunt(const int sts){ exit(sts); } +string basename(const string path){ + size_t where = path.rfind("/"); + if (where != string::npos) return path.substr(1+where); + return path; +} + +string progname, progid; +int mypid; + //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ //// pid_t pid = getpid(); @@ -141,7 +151,14 @@ int main(int _argc, const char** _argv){ //// cout << getpgid(pid) << endl; int argc(_argc); const char **argv(_argv); - string progname(*argv); argv++; argc--; + { + progname = *argv++; argc--; + mypid = getpid(); + stringstream binder; + binder << basename(progname) << "[" << mypid << "]"; + progid = binder.str(); + } + int maxsize(1000000); @@ -183,7 +200,7 @@ int main(int _argc, const char** _argv){ if (getline(cin, header).fail()) continue; msgsize += header.length()+1; if (msgsize > maxsize) { - cerr << "skrewt rejection: bigger than " << maxsize << endl; + cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } for (;;) { @@ -197,7 +214,7 @@ int main(int _argc, const char** _argv){ if (getline(cin, line).fail()) continue; msgsize += line.length()+1; if (msgsize > maxsize) { - cerr << "skrewt rejection: bigger than " << maxsize << endl; + cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } header += "\n" + line; @@ -206,7 +223,7 @@ int main(int _argc, const char** _argv){ if (len && header[len-1] == '\r') len--; // reduced length, not counting if (len == 0) { if (!gotdate) { - cerr << "skrewt rejection: no date" << endl; + cerr << progid << " rejection: no date" << endl; exeunt(ex_spam); // disallow mail with no date } inheads = 0; @@ -256,7 +273,7 @@ int main(int _argc, const char** _argv){ gotdate++; } else if (headword == "subject") { if (rest.find("-please-bounce-this-") != string::npos) { - cerr << "skrewt rejection: by request" << endl; + cerr << progid << " rejection: by request" << endl; exeunt(ex_spam); } } @@ -268,7 +285,7 @@ int main(int _argc, const char** _argv){ if (!getline(cin, line).fail()) { msgsize += line.length()+1; if (msgsize > maxsize) { - cerr << "skrewt rejection: bigger than " << maxsize << endl; + cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } if (line == "--" + boundary) { @@ -284,9 +301,9 @@ int main(int _argc, const char** _argv){ } if (0) cerr << "textlines: " << textlines << endl; if (!textlines) { - cerr << "skrewt rejection: no text" << endl; + cerr << progid << " rejection: no text" << endl; exeunt(ex_spam); } - cerr << "skrewt normal completion" << endl; + cerr << progid << " normal completion" << endl; exit(ex_good); } -- cgit v1.2.3 From 9b5fbc11bb1d96dd598ebfad5539660f75571835 Mon Sep 17 00:00:00 2001 From: John Denker Date: Mon, 23 Jul 2012 18:01:34 -0700 Subject: progress toward cleaning up skrewt --- tools/skrewt.c | 265 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 163 insertions(+), 102 deletions(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index 6de3dd9..ed0e627 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -12,6 +12,7 @@ #include /* perror */ #include +#include using namespace std; @@ -144,6 +145,58 @@ string basename(const string path){ string progname, progid; int mypid; + +/* Content-Type: text/plain; charset="us-ascii" */ +/* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165" */ +void parse_content(const string type_spec_line, string &maintype, string &boundary) { + string mainline(type_spec_line); + + string get_type(toLower(mainline)); + size_t where = get_type.find_first_of(" \t;\n"); + if (where == string::npos) { + // keep whole string + } + else { + get_type = get_type.substr(0,where); + } + where = get_type.find("/"); + if (where == string::npos){ + maintype = ""; + } else { + maintype = get_type.substr(0, where); + } +} + +#ifdef xxxxxxxxxxxxxxxxxxxx + if + if (0) cerr << "type (" << get_type << ") " + << (text_type?"text":"nope") << endl; + + + string srch = "boundary="; + where = headrec.find(srch); + if (where != string::npos) { + where += srch.length(); + boundary = headrec.substr(where); + if (boundary[0] == '"') { + boundary = boundary.substr(1); + where = boundary.find_first_of("\""); + } else { + where = boundary.find_first_of(" \t;\n"); + } + if (where == string::npos) { + /* do nothing, boundary=boundary as a whole */ + } else { + boundary = boundary.substr(0, where); + } + } + } +#endif + + + + + //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ //// pid_t pid = getpid(); @@ -159,8 +212,7 @@ int main(int _argc, const char** _argv){ progid = binder.str(); } - - int maxsize(1000000); + int maxsize(1000*1000); while (argc) { string arg(*argv); argv++; argc--; @@ -186,121 +238,130 @@ int main(int _argc, const char** _argv){ } } - int inheads(1); + int saw_blank_line(0); string boundary("x-xx-x"); - int text_type(1); - int textlines(0); - int gotdate(0); + string date; + string subject; + string content_type; + string message_id; int msgsize(0); - for (;;){ + vector bigbuf; + cerr << "hi there" << endl; + + for (;;){ // outer loop over all records in the header if (cin.eof()) break; if (cin.bad()) return 1; - if (inheads) { - string header; - if (getline(cin, header).fail()) continue; - msgsize += header.length()+1; + + string headrec; +// on fail, go back to top of outer loop and check for eof versus bad + if (getline(cin, headrec).fail()) continue; + msgsize += headrec.length()+1; + if (msgsize > maxsize) { + cerr << progid << " rejection: bigger than " << maxsize << endl; + exeunt(ex_spam); + } + cout << headrec << endl; + bigbuf.push_back(headrec); // for a folded record, this is the first line + + for (;;) { // inner loop to build a multi-line record e.g. folded record: + if (cin.eof()) break; + if (cin.bad()) return 1; + char ch; + if (cin.get(ch).fail()) continue; + cin.putback(ch); + if (ch != ' ' && ch != '\t') break; + string line; +// on fail, go back to top of inner loop and check for eof versus bad + if (getline(cin, line).fail()) continue; + msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } - for (;;) { - if (cin.eof()) break; - if (cin.bad()) return 1; - char ch; - if (cin.get(ch).fail()) continue; - cin.putback(ch); - if (ch != ' ' && ch != '\t') break; - string line; - if (getline(cin, line).fail()) continue; - msgsize += line.length()+1; - if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; - exeunt(ex_spam); - } - header += "\n" + line; - } - int len = header.length(); - if (len && header[len-1] == '\r') len--; // reduced length, not counting - if (len == 0) { - if (!gotdate) { - cerr << progid << " rejection: no date" << endl; - exeunt(ex_spam); // disallow mail with no date - } - inheads = 0; - //cerr << "end of headers" << endl; + cout << line << endl; + bigbuf.push_back(line); + string cooked(line); + if (cooked.length()){ + string::iterator ptr = cooked.end()-1; + if (*ptr == '\r') cooked.erase(ptr); } - else { + headrec += "\n" + cooked; + } +// here with a fully assembled header record + int len = headrec.length(); + if (len && headrec[len-1] == '\r') len--; // reduced length, not counting + if (len == 0) { + saw_blank_line = 1; + break; // no more headers in this message + } + // here if it's a header line - string headword; - string rest; - size_t where = header.find(":"); - if (where != string::npos) { - headword = header.substr(0, where); - rest = ltrim(header.substr(1+where)); - } - headword = toLower(headword); - if (headword == "content-type") { - string the_type = rest; - size_t where = the_type.find_first_of(" \t;\n"); - if (where == string::npos) { - /* do nothing */ - } - else { - the_type = the_type.substr(0,where); - } - the_type = toLower(the_type); - text_type = (the_type.find("text/") == 0); - if (0) cerr << "type (" << the_type << ") " - << (text_type?"text":"nope") << endl; - string srch = "boundary="; - where = header.find(srch); - if (where != string::npos) { - where += srch.length(); - boundary = header.substr(where); - if (boundary[0] == '"') { - boundary = boundary.substr(1); - where = boundary.find_first_of("\""); - } else { - where = boundary.find_first_of(" \t;\n"); - } - if (where == string::npos) { - /* do nothing, boundary=boundary as a whole */ - } else { - boundary = boundary.substr(0, where); - } - } - } else if (headword == "date") { - gotdate++; - } else if (headword == "subject") { - if (rest.find("-please-bounce-this-") != string::npos) { - cerr << progid << " rejection: by request" << endl; - exeunt(ex_spam); - } - } - } - //xxxx cout << header.length() << " ... "; - cout << header << endl; + string headword; + string rest; + size_t where = headrec.find(":"); + if (where != string::npos) { + headword = headrec.substr(0, where); + rest = ltrim(headrec.substr(1+where)); + } + headword = toLower(headword); + if (0){ + } else if (headword == "date") { + date = rest; + } else if (headword == "subject") { + subject = rest; + } else if (headword == "content-type") { + content_type = rest; + } + //xxxx cout << headrec.length() << " ... "; + } + cerr << "headers are done. Delimited: " << saw_blank_line << endl; + +// Headers are done. +// Do some early-stage thinking. + + if (subject.find("-please-bounce-this-") != string::npos) { + cerr << progid << " rejection: by request" << endl; + exeunt(ex_spam); + } + + if (!date.length()) { + cerr << progid << " rejection: no date" << endl; + exeunt(ex_spam); // disallow mail with no date + } + + string contype; + int textlines(0); + parse_content(content_type, contype, boundary); + int is_text = contype == "text"; + +// early-stage thinking has been done. +// Now spew the rest of the message + cerr << "body begins: " << contype << " " << is_text << endl; + int inheads(0); + for (;;){ // outer loop over all lines in the body + if (cin.eof()) break; + if (cin.bad()) return 1; + string line; +// on fail, go back to top of outer loop and check for eof versus bad + if (getline(cin, line).fail()) continue; + msgsize += line.length()+1; + if (msgsize > maxsize) { + cerr << progid << " rejection: bigger than " << maxsize << endl; + exeunt(ex_spam); + } + bigbuf.push_back(line); + cout << line << endl; + if (line == "--" + boundary) { + inheads = 1; } else { - string line; - if (!getline(cin, line).fail()) { - msgsize += line.length()+1; - if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; - exeunt(ex_spam); - } - if (line == "--" + boundary) { - inheads = 1; - } else { - if (text_type) { - if (ltrim(line).length()) textlines++; - } - } - cout << line << endl; + if (is_text) { + if (ltrim(line).length()) textlines++; } } } + if (0) cerr << "textlines: " << textlines << endl; - if (!textlines) { + if (000 && !textlines) { cerr << progid << " rejection: no text" << endl; exeunt(ex_spam); } -- cgit v1.2.3 From 0f0fc6dbb7e7354ac662951f124b0a64c54b741b Mon Sep 17 00:00:00 2001 From: John Denker Date: Mon, 23 Jul 2012 21:37:35 -0700 Subject: get line-counter to work --- tools/skrewt.c | 134 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 77 insertions(+), 57 deletions(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index ed0e627..d2e1bbc 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -145,56 +145,52 @@ string basename(const string path){ string progname, progid; int mypid; - /* Content-Type: text/plain; charset="us-ascii" */ /* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165" */ +// void parse_content(const string type_spec_line, string &maintype, string &boundary) { - string mainline(type_spec_line); - - string get_type(toLower(mainline)); - size_t where = get_type.find_first_of(" \t;\n"); - if (where == string::npos) { - // keep whole string - } - else { - get_type = get_type.substr(0,where); - } - where = get_type.find("/"); - if (where == string::npos){ - maintype = ""; - } else { - maintype = get_type.substr(0, where); - } -} - -#ifdef xxxxxxxxxxxxxxxxxxxx - if - if (0) cerr << "type (" << get_type << ") " - << (text_type?"text":"nope") << endl; - - - string srch = "boundary="; - where = headrec.find(srch); - if (where != string::npos) { - where += srch.length(); - boundary = headrec.substr(where); - if (boundary[0] == '"') { - boundary = boundary.substr(1); - where = boundary.find_first_of("\""); - } else { - where = boundary.find_first_of(" \t;\n"); - } - if (where == string::npos) { - /* do nothing, boundary=boundary as a whole */ - } else { - boundary = boundary.substr(0, where); - } - } - } -#endif + cerr << "parser called with: " << type_spec_line << endl; + string get_type(type_spec_line); + size_t where = get_type.find_first_of(" \t;\n"); + string rest; + if (where == string::npos) { + // keep whole string + } + else { + rest = get_type.substr(where+1); + get_type = get_type.substr(0,where); + } + where = get_type.find("/"); + if (where == string::npos){ + maintype = ""; + cerr << "could not find / in " << get_type << endl; + } else { + maintype = get_type.substr(0, where); + } +// now need to find boundary + string srch = "boundary="; + where = rest.find(srch); + if (where != string::npos) { + where += srch.length(); + boundary = rest.substr(where); + if (boundary[0] == '"') { + boundary = boundary.substr(1); + where = boundary.find_first_of("\""); + } else { + where = boundary.find_first_of(" \t;\n"); + } + if (where == string::npos) { + /* do nothing, boundary=boundary as a whole */ + } else { + boundary = boundary.substr(0, where); + } + } else { + //xxxxxxx cerr << "boundary= not found in " << type_spec_line << endl; + } +} //////////////////////////////////////////////////////////// @@ -329,15 +325,17 @@ int main(int _argc, const char** _argv){ exeunt(ex_spam); // disallow mail with no date } - string contype; - int textlines(0); - parse_content(content_type, contype, boundary); - int is_text = contype == "text"; + string main_contype; + parse_content(content_type, main_contype, boundary); + int currently_text = main_contype == "text"; + int main_multipart = main_contype == "multipart"; // early-stage thinking has been done. // Now spew the rest of the message - cerr << "body begins: " << contype << " " << is_text << endl; - int inheads(0); + cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; + int in_subheads(0); + int textlines(0); + for (;;){ // outer loop over all lines in the body if (cin.eof()) break; if (cin.bad()) return 1; @@ -351,19 +349,41 @@ int main(int _argc, const char** _argv){ } bigbuf.push_back(line); cout << line << endl; - if (line == "--" + boundary) { - inheads = 1; + if (in_subheads){ + if (line == "" || line == "\r") in_subheads = 0; + } + if (in_subheads){ + string sub_contype; + string junk; +// in principle could worry about folded headers, +// but in this application it doesn't actually matter + string headword; + string rest; + size_t where = line.find(":"); + if (where != string::npos) { + headword = line.substr(0, where); + rest = ltrim(line.substr(1+where)); + } + headword = toLower(headword); + if (headword == "content-type") { + parse_content(rest, sub_contype, junk); + currently_text = sub_contype == "text"; + cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; + } } else { - if (is_text) { - if (ltrim(line).length()) textlines++; + if (main_multipart && line == "--" + boundary) { + //xxxx cerr << "found subhead boundary" << endl; + in_subheads = 1; + continue; } + if (currently_text) textlines++; } } - if (0) cerr << "textlines: " << textlines << endl; - if (000 && !textlines) { + if (1) cerr << "textlines: " << textlines << endl; + if (1 && !textlines) { cerr << progid << " rejection: no text" << endl; - exeunt(ex_spam); +// exeunt(ex_spam); } cerr << progid << " normal completion" << endl; exit(ex_good); -- cgit v1.2.3 From e2390efc92d66a50c6ce13d027aac8c6ecc02e89 Mon Sep 17 00:00:00 2001 From: John Denker Date: Tue, 24 Jul 2012 11:28:34 -0700 Subject: remove print statements --- tools/skrewt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index d2e1bbc..ad99e67 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -149,7 +149,7 @@ int mypid; /* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165" */ // void parse_content(const string type_spec_line, string &maintype, string &boundary) { - cerr << "parser called with: " << type_spec_line << endl; + //xxx cerr << "parser called with: " << type_spec_line << endl; string get_type(type_spec_line); size_t where = get_type.find_first_of(" \t;\n"); @@ -310,7 +310,7 @@ int main(int _argc, const char** _argv){ } //xxxx cout << headrec.length() << " ... "; } - cerr << "headers are done. Delimited: " << saw_blank_line << endl; + //xxx cerr << "headers are done. Delimited: " << saw_blank_line << endl; // Headers are done. // Do some early-stage thinking. @@ -368,7 +368,7 @@ int main(int _argc, const char** _argv){ if (headword == "content-type") { parse_content(rest, sub_contype, junk); currently_text = sub_contype == "text"; - cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; + //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; } } else { if (main_multipart && line == "--" + boundary) { -- cgit v1.2.3 From 6e8083ff4ffe3fd2b6d337386637a2b5c1378cf7 Mon Sep 17 00:00:00 2001 From: John Denker Date: Wed, 25 Jul 2012 13:05:03 -0700 Subject: fix a bunch of DOS-CR bugs --- tools/skrewt.c | 121 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 89 insertions(+), 32 deletions(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index ad99e67..a43fd13 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -1,4 +1,4 @@ -/////////////////// +////////////////// // skrewt.c // // scrutinize email @@ -13,6 +13,7 @@ #include /* perror */ #include #include +#include using namespace std; @@ -28,6 +29,7 @@ void usage(const int sts){ " Options\n" " -help print this msg (and exit immediately).\n" " -maxsize ii msg size in bytes; anything bigger will be rejected.\n" +" -error-exit exit early if errors have been detected.\n" "\n" " Messages containing the string '-please-bounce-this-' will be rejected.\n" " Messages with no date will be rejected.\n" @@ -36,7 +38,7 @@ void usage(const int sts){ } // error exit codes, mostly as stated in qmail.c -#define bar \ +#define ErrorCodes \ foo(good, 0) ;\ foo(spam, 21) ;\ foo(permerr, 31) ;\ @@ -46,7 +48,7 @@ foo(syserr, 71) ;\ foo(comerr, 74) ; #define foo(name, num) const int ex_ ## name = num -bar +ErrorCodes #undef foo @@ -104,12 +106,23 @@ string toLower(const std::string& a){ } //////////////// -string ltrim(string foo){ +string ltrim(const string foo){ size_t where = foo.find_first_not_of(" \t\r\n"); if (where == foo.npos) return foo; return foo.substr(where); } +string noCR(const string bar){ + string foo(bar); + int len = foo.length(); + if (len){ + if (foo[len-1] == '\r') { + foo.erase(len-1); + } + } + return foo; +} + //////////////// // little utility to help with argument parsing: // @@ -117,7 +130,8 @@ int prefix(const string shorter, const string longer){ return shorter == longer.substr(0, shorter.length()); } -void exeunt(const int sts){ +void maybe_exeunt(const int sts, const int really){ + if (!really) return; if (sts == ex_good) exit(sts); const char* foo = getenv("HI_Q_GROUP"); @@ -136,6 +150,10 @@ void exeunt(const int sts){ exit(sts); } +void exeunt(const int sts){ + maybe_exeunt(sts, 1); +} + string basename(const string path){ size_t where = path.rfind("/"); if (where != string::npos) return path.substr(1+where); @@ -192,6 +210,15 @@ void parse_content(const string type_spec_line, string &maintype, string &bounda } } +string join(const string sep, const list stuff){ + string rslt; + for (list::const_iterator ptr = stuff.begin(); + ptr != stuff.end(); ptr++){ + if (rslt.length()) rslt += sep; + rslt += *ptr; + } + return rslt; +} //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ @@ -209,6 +236,8 @@ int main(int _argc, const char** _argv){ } int maxsize(1000*1000); + int error_exit(0); + int mid_required(0); while (argc) { string arg(*argv); argv++; argc--; @@ -216,14 +245,18 @@ int main(int _argc, const char** _argv){ if (prefix(arg, "-help")) { usage(0); } - if (prefix(arg, "-maxsize")) { + if (0) { + } else if (prefix(arg, "-mid-required")) { + mid_required++; + } else if (prefix(arg, "-error-exit")) { + error_exit++; + } else if (prefix(arg, "-maxsize")) { if (!argc) { cerr << "Option -maxsize requires an argument" << endl; exit(ex_usage); } maxsize = atoi(*argv); argv++; argc--; - } - if (arg.substr(0,1) == "-") { + } else if (arg.substr(0,1) == "-") { cerr << "Unrecognized option '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; exit(ex_usage); @@ -236,28 +269,32 @@ int main(int _argc, const char** _argv){ int saw_blank_line(0); string boundary("x-xx-x"); - string date; + string to; + string from; string subject; - string content_type; + string date; string message_id; + string content_type; int msgsize(0); vector bigbuf; - cerr << "hi there" << endl; + int recno(0); + //xxxx cerr << progid << " begins" << endl; for (;;){ // outer loop over all records in the header if (cin.eof()) break; if (cin.bad()) return 1; - string headrec; + string line; // on fail, go back to top of outer loop and check for eof versus bad - if (getline(cin, headrec).fail()) continue; - msgsize += headrec.length()+1; + if (getline(cin, line).fail()) continue; + msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } - cout << headrec << endl; - bigbuf.push_back(headrec); // for a folded record, this is the first line + cout << line << endl; + bigbuf.push_back(line); + string headrec = noCR(line); // for a folded record, this is the first line for (;;) { // inner loop to build a multi-line record e.g. folded record: if (cin.eof()) break; @@ -276,12 +313,7 @@ int main(int _argc, const char** _argv){ } cout << line << endl; bigbuf.push_back(line); - string cooked(line); - if (cooked.length()){ - string::iterator ptr = cooked.end()-1; - if (*ptr == '\r') cooked.erase(ptr); - } - headrec += "\n" + cooked; + headrec += "\n" + noCR(line); } // here with a fully assembled header record int len = headrec.length(); @@ -301,6 +333,12 @@ int main(int _argc, const char** _argv){ } headword = toLower(headword); if (0){ + } else if (headword == "from") { + from = rest; + } else if (headword == "to") { + to = rest; + } else if (headword == "message-id") { + message_id = rest; } else if (headword == "date") { date = rest; } else if (headword == "subject") { @@ -309,30 +347,49 @@ int main(int _argc, const char** _argv){ content_type = rest; } //xxxx cout << headrec.length() << " ... "; + recno++; + if (0) if (recno <= 6) cerr << progid << "#" << recno + << " " << headrec << endl; } - //xxx cerr << "headers are done. Delimited: " << saw_blank_line << endl; + cerr << progid <<" Mid '" << message_id << "'" << endl; // Headers are done. // Do some early-stage thinking. + list badnews; + if (subject.find("-please-bounce-this-") != string::npos) { - cerr << progid << " rejection: by request" << endl; - exeunt(ex_spam); + badnews.push_back("by request"); } if (!date.length()) { - cerr << progid << " rejection: no date" << endl; - exeunt(ex_spam); // disallow mail with no date + badnews.push_back("no date"); + } + + if (mid_required && !message_id.length()) { + badnews.push_back("no message-id"); + } + + if (badnews.size()){ + cerr << progid << " " << join(", ", badnews) << endl; + if (error_exit){ + cerr << progid << " '" << from + << "' to '" << to + << "'" << endl; + exeunt(ex_spam); + } } string main_contype; - parse_content(content_type, main_contype, boundary); + if (content_type.length()) + parse_content(content_type, main_contype, boundary); +// some slightly-useful booleans: int currently_text = main_contype == "text"; int main_multipart = main_contype == "multipart"; // early-stage thinking has been done. // Now spew the rest of the message - cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; + //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; int in_subheads(0); int textlines(0); @@ -345,7 +402,7 @@ int main(int _argc, const char** _argv){ msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; - exeunt(ex_spam); + maybe_exeunt(ex_spam, error_exit); } bigbuf.push_back(line); cout << line << endl; @@ -380,10 +437,10 @@ int main(int _argc, const char** _argv){ } } - if (1) cerr << "textlines: " << textlines << endl; + if (0) cerr << "textlines: " << textlines << endl; if (1 && !textlines) { cerr << progid << " rejection: no text" << endl; -// exeunt(ex_spam); +// maybe_exeunt(ex_spam, error_exit); } cerr << progid << " normal completion" << endl; exit(ex_good); -- cgit v1.2.3 From d2564d25e802d1ee3230cf045c4940e836b5c6a2 Mon Sep 17 00:00:00 2001 From: John Denker Date: Sun, 29 Jul 2012 16:50:11 -0700 Subject: split ltgrey (and libltgrey) off from greylist; put some utility functions into their own file. --- tools/skrewt.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index a43fd13..3fee644 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -37,19 +37,7 @@ void usage(const int sts){ exit(sts); } -// error exit codes, mostly as stated in qmail.c -#define ErrorCodes \ -foo(good, 0) ;\ -foo(spam, 21) ;\ -foo(permerr, 31) ;\ -foo(usage, 39) ;\ -foo(greylisting, 70) ;\ -foo(syserr, 71) ;\ -foo(comerr, 74) ; - -#define foo(name, num) const int ex_ ## name = num -ErrorCodes -#undef foo +#include "qq_exit_codes.h" ///////////////////////////////////////////////////////// @@ -316,8 +304,8 @@ int main(int _argc, const char** _argv){ headrec += "\n" + noCR(line); } // here with a fully assembled header record +// headrec (unlike line) contains no DOS CR characters int len = headrec.length(); - if (len && headrec[len-1] == '\r') len--; // reduced length, not counting if (len == 0) { saw_blank_line = 1; break; // no more headers in this message @@ -351,6 +339,7 @@ int main(int _argc, const char** _argv){ if (0) if (recno <= 6) cerr << progid << "#" << recno << " " << headrec << endl; } + if (saw_blank_line) {/* ignore */} cerr << progid <<" Mid '" << message_id << "'" << endl; // Headers are done. -- cgit v1.2.3 From 63be414b62f3234ad80607b95e8e71e33bfd8025 Mon Sep 17 00:00:00 2001 From: John Denker Date: Sun, 29 Jul 2012 17:00:15 -0700 Subject: move more stuff to utils.c ... I hate duplication of code --- tools/skrewt.c | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) (limited to 'tools/skrewt.c') diff --git a/tools/skrewt.c b/tools/skrewt.c index 3fee644..6749a01 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -38,7 +38,7 @@ void usage(const int sts){ } #include "qq_exit_codes.h" - +#include "utils.h" ///////////////////////////////////////////////////////// // Case insensitive comparison of strings @@ -83,23 +83,6 @@ int cmp_casefold(const std::string& a, const std::string& b) { return 0; } - -string toLower(const std::string& a){ - string rslt = a; - string::iterator rr; - for (rr = rslt.begin(); rr != rslt.end(); rr++){ - *rr = tolower(*rr); - } - return rslt; -} - -//////////////// -string ltrim(const string foo){ - size_t where = foo.find_first_not_of(" \t\r\n"); - if (where == foo.npos) return foo; - return foo.substr(where); -} - string noCR(const string bar){ string foo(bar); int len = foo.length(); @@ -111,13 +94,6 @@ string noCR(const string bar){ return foo; } -//////////////// -// little utility to help with argument parsing: -// -int prefix(const string shorter, const string longer){ - return shorter == longer.substr(0, shorter.length()); -} - void maybe_exeunt(const int sts, const int really){ if (!really) return; if (sts == ex_good) exit(sts); @@ -142,12 +118,6 @@ void exeunt(const int sts){ maybe_exeunt(sts, 1); } -string basename(const string path){ - size_t where = path.rfind("/"); - if (where != string::npos) return path.substr(1+where); - return path; -} - string progname, progid; int mypid; -- cgit v1.2.3