From a26e789121448d095227583f7a29ae2deac88a33 Mon Sep 17 00:00:00 2001 From: John Denker Date: Fri, 23 Nov 2012 01:06:10 -0800 Subject: mid debug --- tools/libskrewt.c | 251 ++++++++++++++++++++++++++++++++++++------------------ tools/libskrewt.h | 20 ++++- tools/makefile | 2 +- tools/ward.c | 1 + 4 files changed, 185 insertions(+), 89 deletions(-) (limited to 'tools') diff --git a/tools/libskrewt.c b/tools/libskrewt.c index 68fe741..dc131f9 100644 --- a/tools/libskrewt.c +++ b/tools/libskrewt.c @@ -7,7 +7,7 @@ using namespace std; void parse_content(const string type_spec_line, string &maintype, string &boundary) { - //xxx cerr << "parser called with: " << type_spec_line << endl; + cerr << "parser called with: " << type_spec_line << endl; string get_type(type_spec_line); size_t where = get_type.find_first_of(" \t;\n"); @@ -61,65 +61,96 @@ int skrewt::krunch_rfrom(){ return ex_syserr; } parse >> proximta_rDNS; - parse >> word; - if (word == "(HELO" /*)*/) { - parse >> proximta_HELO; - proximta_HELO = rtrim(proximta_HELO, "()"); + for (;;) { parse >> word; - } else { - proximta_HELO = proximta_rDNS; - } - size_t len = word.length(); - if (len<2 || word[0] != '(' || word[len-1] != ')') { - cerr << progid << " bad 'Received: from' line ;;; '" - << word << "'" << endl; - return ex_syserr; + size_t len = word.length(); + if (len < 2) { + cerr << progid << " bad 'Received: from' line ;;; short '" + << word << "'" << endl; + return ex_syserr; + } + if (word == "by") break; + if (word == "(HELO" /*)*/) { + parse >> proximta_HELO; + proximta_HELO = rtrim(proximta_HELO, "()"); + continue; + } + if (word[0] != '(' || word[len-1] != ')') { + cerr << progid << " bad 'Received: from' line ;;; no parens: '" + << word << "'" << endl; + return ex_syserr; + } + proximta_IP = word.substr(1, len-2); + size_t where = proximta_IP.find("@"); + if (where != string::npos){ + proximta_AuthUser = proximta_IP.substr(0, where); + proximta_IP = proximta_IP.substr(1+where); + } } - proximta_IP = word.substr(1, len-2); - size_t where = proximta_IP.find("@"); - if (where != string::npos){ - proximta_AuthUser = proximta_IP.substr(0, where); - proximta_IP = proximta_IP.substr(1+where); +// provide some kind of default? maybe not. + if (0) if (proximta_HELO == "") { + proximta_HELO = proximta_rDNS + " (+-)"; } return 0; } -int skrewt::headers(istream& xin){ - //xxxx cerr << progid << " begins" << endl; - for (;;){ // outer loop over all records in the header - if (xin.eof()) break; - if (xin.bad()) return 1; +xstr skrewt::getLine(istream& xin){ + string rslt; + if (lookahead.err == 0) { + rslt = lookahead.str; + lookahead.err = 1; + return {0, rslt}; + } + if (getline(xin, rslt).fail()) return {1, ""}; + else return {0, rslt}; +} - string line; -// on fail, go back to top of outer loop and check for eof versus bad - if (getline(xin, line).fail()) continue; - msgsize += line.length()+1; +xstr skrewt::getRecord(istream& xin){ + xstr line = getLine(xin); + if (line.err) return line; + msgsize += line.str.length()+1; if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; + cerr << progid << " header rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } - bigbuf.push_back(line); - string headrec = noCR(line); // for a folded record, this is the first line + bigbuf.push_back(line.str); +// for a folded record, this is the first line: + string headrec = noCR(line.str); - for (;;) { // inner loop to build a multi-line record e.g. folded record: +// inner loop to build a multi-line record e.g. folded record: + for (;;) { if (xin.eof()) break; - if (xin.bad()) return 1; - char ch; - if (xin.get(ch).fail()) continue; - xin.putback(ch); - if (ch != ' ' && ch != '\t') break; - string line; -// on fail, go back to top of inner loop and check for eof versus bad - if (getline(xin, line).fail()) continue; - msgsize += line.length()+1; + if (xin.bad()) return {1, ""}; + xstr more = getLine(xin); + if (more.err) continue; + char ch(0); + if (more.str != "") ch = more.str[0]; + if (ch != ' ' && ch != '\t') { + lookahead = more; + break; + } + msgsize += line.str.length()+1; if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; + cerr << progid + << " folded header rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } - bigbuf.push_back(line); - headrec += "\n" + noCR(line); + bigbuf.push_back(line.str); + headrec += "\n" + noCR(line.str); } + return {0, headrec}; +} + +int skrewt::headers(istream& xin){ + //xxxx cerr << progid << " begins" << endl; + for (;;){ // outer loop over all records in the header + if (xin.eof()) break; + if (xin.bad()) return 1; + + xstr rec = getRecord(xin); + if (rec.err) break; + string headrec = rec.str; // here with a fully assembled header record // headrec (unlike line) contains no DOS CR characters int len = headrec.length(); @@ -146,7 +177,12 @@ int skrewt::headers(istream& xin){ return_path = rest; } else if (headword == "message-id") { message_id = rest; - } else if (headword == "received") { + } +// Now pick up the proximal "received: from" line, +// i.e. the latest in time, +// i.e. the topmost in the file, +// i.e. the one that was just now put there by qmail-smtpd: + else if (headword == "received") { if (!received_from.length() && prefix("from ", rest)){ received_from = rest; } @@ -157,7 +193,7 @@ int skrewt::headers(istream& xin){ } else if (headword == "content-type") { content_type = rest; } else if (headword == "delivered-to") { - delivered_to = rest; + delivered_to.push_back(rest); } recno++; if (0) if (recno <= 6) cerr << progid << "#" << recno @@ -176,30 +212,39 @@ int skrewt::dump_headers(std::ostream& xout){ int skrewt::interstage(){ if (saw_blank_line) {/* ignore */} -// Note that the headers are in reverse-chronological order: - cerr << progid <<" Return-path: " << return_path <>>" << endl; - } - if (toLower(trim(delivered_to)) == "jean@av8n.com") { - badnews.push_back("Looping Delivered-to: " + delivered_to); + for (list::const_iterator foo = delivered_to.begin(); + foo != delivered_to.end(); foo++){ + cerr << progid << " Delivered-to: <<<" << *foo << ">>>" << endl; + if (toLower(trim(*foo)) == "jean@av8n.com") { + badnews.push_back("Looping Delivered-to: " + *foo); + } } if (subject.find("-please-bounce-this-") != string::npos) { @@ -333,7 +379,7 @@ int skrewt::interstage(){ if (badnews.size() && !whitelisted){ cerr << progid << " " << join(", ", badnews) << endl; if (error_exit){ - cerr << progid << " '" << from + cerr << progid << " ... badnews from '" << from << "' to '" << to << "'" << endl; exeunt(ex_spam); @@ -342,22 +388,35 @@ int skrewt::interstage(){ return 0; } +class conner{ +public: + string tp; + string bdy; +}; + int skrewt::body(std::istream& xin, std::ostream& xout){ - string main_contype; - if (content_type.length()) - parse_content(content_type, main_contype, boundary); -// some slightly-useful booleans: - int currently_text = main_contype == "text"; - int main_multipart = main_contype == "multipart"; + list sitch; + if (content_type.length()) { + string tp, bdy; + parse_content(content_type, tp, bdy); + sitch.push_front({tp, bdy}); + } else { + // assume single-part, all text if not otherwise specified: + sitch.push_front({"text", ""}); + } // early-stage thinking has been done. // Now spew the rest of the message // Note that multipart messages can have sub-headers. - //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; + cerr << "body begins: tp: " << sitch.front().tp + << " bdy: " << sitch.front().bdy + << " depth: " << sitch.size() + << endl; int in_subheads(0); int textlines(0); + string sub_conntype(""); for (;;){ // outer loop over all lines in the body if (xin.eof()) break; if (xin.bad()) return 1; @@ -371,14 +430,27 @@ int skrewt::body(std::istream& xin, std::ostream& xout){ } bigbuf.push_back(line); xout << line << endl; + if (0) cerr << "+++" << line << endl; if (in_subheads){ - if (line == "" || line == "\r") in_subheads = 0; + if (line == "" || line == "\r") { + in_subheads = 0; + cerr << "+++ end of subhead" << endl; + string tp, bdy; + parse_content(sub_conntype, tp, bdy); + cerr << tp << " '" << bdy << "'" << endl; + sitch.push_front({tp, bdy}); + cerr << "payload mode: tp: " << sitch.front().tp + << " bdy: " << sitch.front().bdy + << " depth: " << sitch.size() + << endl; + } } - if (in_subheads){ - string sub_contype; + if (/* still */in_subheads){ string junk; +// FIXME // in principle could worry about folded headers, // but in this application it doesn't actually matter + string headword; string rest; size_t where = line.find(":"); @@ -388,17 +460,26 @@ int skrewt::body(std::istream& xin, std::ostream& xout){ } headword = toLower(headword); if (headword == "content-type") { - parse_content(rest, sub_contype, junk); - currently_text = sub_contype == "text"; - //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; + sub_conntype = rest; } } else { - if (main_multipart && line == "--" + boundary) { - //xxxx cerr << "found subhead boundary" << endl; + if (sitch.front().tp == "multipart" + && line == "--" + sitch.front().bdy) { + cerr << "boundary: begin subhead block" << endl; + in_subheads = 1; + sub_conntype = ""; + continue; + } + if (sitch.front().tp == "multipart" + && line == "--" + sitch.front().bdy + "--") { + sitch.pop_front(); + cerr << "found subhead termination --> " + << sitch.front().tp + << endl; in_subheads = 1; continue; } - if (currently_text) textlines++; + if (sitch.front().tp == "text") textlines++; } } diff --git a/tools/libskrewt.h b/tools/libskrewt.h index cfbbb79..5ae6ece 100644 --- a/tools/libskrewt.h +++ b/tools/libskrewt.h @@ -5,6 +5,14 @@ #include "qq_exit_codes.h" // a bit of a kludge extern std::string progid; +class xstr { +public: + int err; + std::string str; + xstr() : err(1), str("") {} + xstr(const int foo, const std::string bar) : err(foo), str(bar) {} +}; + class skrewt{ public: std::string received_from; // envelope HELO among other things @@ -12,6 +20,7 @@ public: std::string proximta_rDNS; std::string proximta_IP; std::string proximta_AuthUser; + SPF_result_t spf_result; std::string return_path; // envelope MAIL FROM std::string boundary; std::string to; @@ -20,7 +29,7 @@ public: std::string date; std::string message_id; std::string content_type; - std::string delivered_to; + std::list delivered_to; int msgsize; std::vector bigbuf; int saw_blank_line; @@ -32,15 +41,20 @@ public: // constructor skrewt() - : boundary("x-xx-x"), msgsize(0), saw_blank_line(0), recno(0), - maxsize(1000*1000), error_exit(0), mid_required(0) + : spf_result(SPF_RESULT_INVALID), + boundary("x-xx-x"), msgsize(0), saw_blank_line(0), recno(0), + maxsize(1000*1000), error_exit(0), mid_required(0), + lookahead(1, "") {} + xstr getRecord(std::istream& xin); + xstr getLine(std::istream& xin); int headers(std::istream& xin); int dump_headers(std::ostream& xout); int interstage(); int body(std::istream& xin, std::ostream& xout); int krunch_rfrom(); + xstr lookahead; }; void parse_content(const std::string type_spec_line, diff --git a/tools/makefile b/tools/makefile index 1cb1425..97a39b4 100644 --- a/tools/makefile +++ b/tools/makefile @@ -15,7 +15,7 @@ qmain = pido.c hi-q.c skrewt.c hi-test.c mail-scan.c greylist.c wripper.c qprogs = $(qmain:%.c=%) # sources for other main programs: -moremain = wripper.c bash-c.c ltgrey.c fixown.c +moremain = wripper.c bash-c.c ltgrey.c fixown.c ward.c moreprogs = $(moremain:%.c=%) nonmain = libltgrey.c diff --git a/tools/ward.c b/tools/ward.c index b6f5a15..b8bdee4 100644 --- a/tools/ward.c +++ b/tools/ward.c @@ -85,6 +85,7 @@ int main(int _argc, const char** _argv){ } skrewt mysk; +// cerr << "maxsize: " << mysk.maxsize << endl; argParser ARGS(argc, argv); try {while (ARGS.size()) { -- cgit v1.2.3