From 9b5fbc11bb1d96dd598ebfad5539660f75571835 Mon Sep 17 00:00:00 2001 From: John Denker Date: Mon, 23 Jul 2012 18:01:34 -0700 Subject: progress toward cleaning up skrewt --- tools/skrewt.c | 265 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 163 insertions(+), 102 deletions(-) diff --git a/tools/skrewt.c b/tools/skrewt.c index 6de3dd9..ed0e627 100644 --- a/tools/skrewt.c +++ b/tools/skrewt.c @@ -12,6 +12,7 @@ #include /* perror */ #include +#include using namespace std; @@ -144,6 +145,58 @@ string basename(const string path){ string progname, progid; int mypid; + +/* Content-Type: text/plain; charset="us-ascii" */ +/* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165" */ +void parse_content(const string type_spec_line, string &maintype, string &boundary) { + string mainline(type_spec_line); + + string get_type(toLower(mainline)); + size_t where = get_type.find_first_of(" \t;\n"); + if (where == string::npos) { + // keep whole string + } + else { + get_type = get_type.substr(0,where); + } + where = get_type.find("/"); + if (where == string::npos){ + maintype = ""; + } else { + maintype = get_type.substr(0, where); + } +} + +#ifdef xxxxxxxxxxxxxxxxxxxx + if + if (0) cerr << "type (" << get_type << ") " + << (text_type?"text":"nope") << endl; + + + string srch = "boundary="; + where = headrec.find(srch); + if (where != string::npos) { + where += srch.length(); + boundary = headrec.substr(where); + if (boundary[0] == '"') { + boundary = boundary.substr(1); + where = boundary.find_first_of("\""); + } else { + where = boundary.find_first_of(" \t;\n"); + } + if (where == string::npos) { + /* do nothing, boundary=boundary as a whole */ + } else { + boundary = boundary.substr(0, where); + } + } + } +#endif + + + + + //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ //// pid_t pid = getpid(); @@ -159,8 +212,7 @@ int main(int _argc, const char** _argv){ progid = binder.str(); } - - int maxsize(1000000); + int maxsize(1000*1000); while (argc) { string arg(*argv); argv++; argc--; @@ -186,121 +238,130 @@ int main(int _argc, const char** _argv){ } } - int inheads(1); + int saw_blank_line(0); string boundary("x-xx-x"); - int text_type(1); - int textlines(0); - int gotdate(0); + string date; + string subject; + string content_type; + string message_id; int msgsize(0); - for (;;){ + vector bigbuf; + cerr << "hi there" << endl; + + for (;;){ // outer loop over all records in the header if (cin.eof()) break; if (cin.bad()) return 1; - if (inheads) { - string header; - if (getline(cin, header).fail()) continue; - msgsize += header.length()+1; + + string headrec; +// on fail, go back to top of outer loop and check for eof versus bad + if (getline(cin, headrec).fail()) continue; + msgsize += headrec.length()+1; + if (msgsize > maxsize) { + cerr << progid << " rejection: bigger than " << maxsize << endl; + exeunt(ex_spam); + } + cout << headrec << endl; + bigbuf.push_back(headrec); // for a folded record, this is the first line + + for (;;) { // inner loop to build a multi-line record e.g. folded record: + if (cin.eof()) break; + if (cin.bad()) return 1; + char ch; + if (cin.get(ch).fail()) continue; + cin.putback(ch); + if (ch != ' ' && ch != '\t') break; + string line; +// on fail, go back to top of inner loop and check for eof versus bad + if (getline(cin, line).fail()) continue; + msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } - for (;;) { - if (cin.eof()) break; - if (cin.bad()) return 1; - char ch; - if (cin.get(ch).fail()) continue; - cin.putback(ch); - if (ch != ' ' && ch != '\t') break; - string line; - if (getline(cin, line).fail()) continue; - msgsize += line.length()+1; - if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; - exeunt(ex_spam); - } - header += "\n" + line; - } - int len = header.length(); - if (len && header[len-1] == '\r') len--; // reduced length, not counting - if (len == 0) { - if (!gotdate) { - cerr << progid << " rejection: no date" << endl; - exeunt(ex_spam); // disallow mail with no date - } - inheads = 0; - //cerr << "end of headers" << endl; + cout << line << endl; + bigbuf.push_back(line); + string cooked(line); + if (cooked.length()){ + string::iterator ptr = cooked.end()-1; + if (*ptr == '\r') cooked.erase(ptr); } - else { + headrec += "\n" + cooked; + } +// here with a fully assembled header record + int len = headrec.length(); + if (len && headrec[len-1] == '\r') len--; // reduced length, not counting + if (len == 0) { + saw_blank_line = 1; + break; // no more headers in this message + } + // here if it's a header line - string headword; - string rest; - size_t where = header.find(":"); - if (where != string::npos) { - headword = header.substr(0, where); - rest = ltrim(header.substr(1+where)); - } - headword = toLower(headword); - if (headword == "content-type") { - string the_type = rest; - size_t where = the_type.find_first_of(" \t;\n"); - if (where == string::npos) { - /* do nothing */ - } - else { - the_type = the_type.substr(0,where); - } - the_type = toLower(the_type); - text_type = (the_type.find("text/") == 0); - if (0) cerr << "type (" << the_type << ") " - << (text_type?"text":"nope") << endl; - string srch = "boundary="; - where = header.find(srch); - if (where != string::npos) { - where += srch.length(); - boundary = header.substr(where); - if (boundary[0] == '"') { - boundary = boundary.substr(1); - where = boundary.find_first_of("\""); - } else { - where = boundary.find_first_of(" \t;\n"); - } - if (where == string::npos) { - /* do nothing, boundary=boundary as a whole */ - } else { - boundary = boundary.substr(0, where); - } - } - } else if (headword == "date") { - gotdate++; - } else if (headword == "subject") { - if (rest.find("-please-bounce-this-") != string::npos) { - cerr << progid << " rejection: by request" << endl; - exeunt(ex_spam); - } - } - } - //xxxx cout << header.length() << " ... "; - cout << header << endl; + string headword; + string rest; + size_t where = headrec.find(":"); + if (where != string::npos) { + headword = headrec.substr(0, where); + rest = ltrim(headrec.substr(1+where)); + } + headword = toLower(headword); + if (0){ + } else if (headword == "date") { + date = rest; + } else if (headword == "subject") { + subject = rest; + } else if (headword == "content-type") { + content_type = rest; + } + //xxxx cout << headrec.length() << " ... "; + } + cerr << "headers are done. Delimited: " << saw_blank_line << endl; + +// Headers are done. +// Do some early-stage thinking. + + if (subject.find("-please-bounce-this-") != string::npos) { + cerr << progid << " rejection: by request" << endl; + exeunt(ex_spam); + } + + if (!date.length()) { + cerr << progid << " rejection: no date" << endl; + exeunt(ex_spam); // disallow mail with no date + } + + string contype; + int textlines(0); + parse_content(content_type, contype, boundary); + int is_text = contype == "text"; + +// early-stage thinking has been done. +// Now spew the rest of the message + cerr << "body begins: " << contype << " " << is_text << endl; + int inheads(0); + for (;;){ // outer loop over all lines in the body + if (cin.eof()) break; + if (cin.bad()) return 1; + string line; +// on fail, go back to top of outer loop and check for eof versus bad + if (getline(cin, line).fail()) continue; + msgsize += line.length()+1; + if (msgsize > maxsize) { + cerr << progid << " rejection: bigger than " << maxsize << endl; + exeunt(ex_spam); + } + bigbuf.push_back(line); + cout << line << endl; + if (line == "--" + boundary) { + inheads = 1; } else { - string line; - if (!getline(cin, line).fail()) { - msgsize += line.length()+1; - if (msgsize > maxsize) { - cerr << progid << " rejection: bigger than " << maxsize << endl; - exeunt(ex_spam); - } - if (line == "--" + boundary) { - inheads = 1; - } else { - if (text_type) { - if (ltrim(line).length()) textlines++; - } - } - cout << line << endl; + if (is_text) { + if (ltrim(line).length()) textlines++; } } } + if (0) cerr << "textlines: " << textlines << endl; - if (!textlines) { + if (000 && !textlines) { cerr << progid << " rejection: no text" << endl; exeunt(ex_spam); } -- cgit v1.2.3