////////////////// // skrewt.c // // scrutinize email // #include #include /* for exit() */ #include /* for strcmp() */ #include /* toupper */ #include #include /* perror */ #include #include #include using namespace std; void usage(const int sts){ (sts ? cerr : cout) << "Usage: skrewt [options]\n" "\n" " Scrutinizes email. Reads stdin, copies it to stdout.\n" " Exit result 0 means good, 21 means rejection (spam).\n" " Writes reason for rejection to stderr.\n" "\n" " Typically used as a filter in a pipeline, along with spamc -E\n" " Options\n" " -help print this msg (and exit immediately).\n" " -maxsize ii msg size in bytes; anything bigger will be rejected.\n" " -error-exit exit early if errors have been detected.\n" "\n" " Messages containing the string '-please-bounce-this-' will be rejected.\n" " Messages with no date will be rejected.\n" ; exit(sts); } #include "qq_exit_codes.h" #include "utils.h" ///////////////////////////////////////////////////////// // Case insensitive comparison of strings class lessthan_foldcase{ public: bool operator() (const std::string& a, const std::string& b) const { size_t a_len = a.length(); size_t b_len = b.length(); size_t lim = a_len < b_len ? a_len : b_len; for (size_t i=0; i chb) return false; } // here if one is an extension of the other if ( a_len < b_len ) return true; return false; } }; // Returns negative if a is less than b in alphabetical order // returns 0 if they are the same, or positive if a is greater. // Like perl cmp operator, but ignores case. int cmp_casefold(const std::string& a, const std::string& b) { string::const_iterator aa, bb; aa = a.begin(); bb = b.begin(); while (aa != a.end() && bb != b.end()){ char ca = tolower(*aa++); char cb = tolower(*bb++); if (ca != cb) return ca < cb ? -2 : 2; } if (aa != a.end()) return 1; // a is longer if (bb != b.end()) return -1; // b is longer return 0; } string noCR(const string bar){ string foo(bar); int len = foo.length(); if (len){ if (foo[len-1] == '\r') { foo.erase(len-1); } } return foo; } void maybe_exeunt(const int sts, const int really){ if (!really) return; if (sts == ex_good) exit(sts); const char* foo = getenv("HI_Q_GROUP"); if (!foo) exit(sts); // No point in signalling ourself: sighandler_t rslt = signal(SIGUSR1, SIG_IGN); if (rslt == SIG_ERR) { cerr << "error setting signal" << endl; } int k = kill(-atoi(foo), SIGUSR1); if (k) { cerr << "kill failed on group " << atoi(foo) << " ... "; perror(0); } exit(sts); } void exeunt(const int sts){ maybe_exeunt(sts, 1); } string progname, progid; int mypid; /* Content-Type: text/plain; charset="us-ascii" */ /* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165" */ // void parse_content(const string type_spec_line, string &maintype, string &boundary) { //xxx cerr << "parser called with: " << type_spec_line << endl; string get_type(type_spec_line); size_t where = get_type.find_first_of(" \t;\n"); string rest; if (where == string::npos) { // keep whole string } else { rest = get_type.substr(where+1); get_type = get_type.substr(0,where); } where = get_type.find("/"); if (where == string::npos){ maintype = ""; cerr << "could not find / in " << get_type << endl; } else { maintype = get_type.substr(0, where); } // now need to find boundary string srch = "boundary="; where = rest.find(srch); if (where != string::npos) { where += srch.length(); boundary = rest.substr(where); if (boundary[0] == '"') { boundary = boundary.substr(1); where = boundary.find_first_of("\""); } else { where = boundary.find_first_of(" \t;\n"); } if (where == string::npos) { /* do nothing, boundary=boundary as a whole */ } else { boundary = boundary.substr(0, where); } } else { //xxxxxxx cerr << "boundary= not found in " << type_spec_line << endl; } } string join(const string sep, const list stuff){ string rslt; for (list::const_iterator ptr = stuff.begin(); ptr != stuff.end(); ptr++){ if (rslt.length()) rslt += sep; rslt += *ptr; } return rslt; } class skrewt{ public: string boundary; string to; string from; string subject; string date; string message_id; string content_type; int msgsize; vector bigbuf; int saw_blank_line; int recno; int maxsize; int error_exit; int mid_required; // constructor skrewt() : boundary("x-xx-x"), msgsize(0), saw_blank_line(0), recno(0), maxsize(1000*1000), error_exit(0), mid_required(0) {} int headers(); int interstage(); int body(); }; int skrewt::headers(){ //xxxx cerr << progid << " begins" << endl; for (;;){ // outer loop over all records in the header if (cin.eof()) break; if (cin.bad()) return 1; string line; // on fail, go back to top of outer loop and check for eof versus bad if (getline(cin, line).fail()) continue; msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } cout << line << endl; bigbuf.push_back(line); string headrec = noCR(line); // for a folded record, this is the first line for (;;) { // inner loop to build a multi-line record e.g. folded record: if (cin.eof()) break; if (cin.bad()) return 1; char ch; if (cin.get(ch).fail()) continue; cin.putback(ch); if (ch != ' ' && ch != '\t') break; string line; // on fail, go back to top of inner loop and check for eof versus bad if (getline(cin, line).fail()) continue; msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } cout << line << endl; bigbuf.push_back(line); headrec += "\n" + noCR(line); } // here with a fully assembled header record // headrec (unlike line) contains no DOS CR characters int len = headrec.length(); if (len == 0) { saw_blank_line = 1; break; // no more headers in this message } // here if it's a header line string headword; string rest; size_t where = headrec.find(":"); if (where != string::npos) { headword = headrec.substr(0, where); rest = ltrim(headrec.substr(1+where)); } headword = toLower(headword); if (0){ } else if (headword == "from") { from = rest; } else if (headword == "to") { to = rest; } else if (headword == "message-id") { message_id = rest; } else if (headword == "date") { date = rest; } else if (headword == "subject") { subject = rest; } else if (headword == "content-type") { content_type = rest; } //xxxx cout << headrec.length() << " ... "; recno++; if (0) if (recno <= 6) cerr << progid << "#" << recno << " " << headrec << endl; } return 0; } int skrewt::interstage(){ if (saw_blank_line) {/* ignore */} cerr << progid <<" Mid '" << message_id << "'" << endl; // The logic here is: In order: // 1) If whitelisted, accept. No greylisting, no spam-checking. // 2) If blacklisted, reject. No greylisting, no spam-checking. // 3) If good reputation, spam-check it and send it on its way. // 4) If no reputation, greylist. // 5) If bad reputation, ???? // Expanding item 3 to the next level of detail: // 3a) If some domain vouches for this sender-IP via SPF, // then the reputation is bound to the domain. // 3c) If some domain vouches for the message vie DKIM, // then the reputation is bound to the domain. // 3d) If no SPF or DKIM, then the reputation attaches // to the sender-IP. // Expanding item 4 to the next level of detail: // 4a) If the greylisting database says this message is ripe // spam-check it. If it's OK, use it to count toward reputation. // 4b) If it is previously unseen or too old, start greylisting // timer from scratch. Reject with temporary error. // 4c) If it is in the "green" state, let the timer // continue from where it is. Reject with temporary error. list badnews; if (subject.find("-please-bounce-this-") != string::npos) { badnews.push_back("by request"); } if (!date.length()) { badnews.push_back("no date"); } if (mid_required && !message_id.length()) { badnews.push_back("no message-id"); } if (badnews.size()){ cerr << progid << " " << join(", ", badnews) << endl; if (error_exit){ cerr << progid << " '" << from << "' to '" << to << "'" << endl; exeunt(ex_spam); } } return 0; } int skrewt::body(){ string main_contype; if (content_type.length()) parse_content(content_type, main_contype, boundary); // some slightly-useful booleans: int currently_text = main_contype == "text"; int main_multipart = main_contype == "multipart"; // early-stage thinking has been done. // Now spew the rest of the message //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; int in_subheads(0); int textlines(0); for (;;){ // outer loop over all lines in the body if (cin.eof()) break; if (cin.bad()) return 1; string line; // on fail, go back to top of outer loop and check for eof versus bad if (getline(cin, line).fail()) continue; msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; maybe_exeunt(ex_spam, error_exit); } bigbuf.push_back(line); cout << line << endl; if (in_subheads){ if (line == "" || line == "\r") in_subheads = 0; } if (in_subheads){ string sub_contype; string junk; // in principle could worry about folded headers, // but in this application it doesn't actually matter string headword; string rest; size_t where = line.find(":"); if (where != string::npos) { headword = line.substr(0, where); rest = ltrim(line.substr(1+where)); } headword = toLower(headword); if (headword == "content-type") { parse_content(rest, sub_contype, junk); currently_text = sub_contype == "text"; //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; } } else { if (main_multipart && line == "--" + boundary) { //xxxx cerr << "found subhead boundary" << endl; in_subheads = 1; continue; } if (currently_text) textlines++; } } if (0) cerr << "textlines: " << textlines << endl; if (!textlines) { cerr << progid << " rejection: no text: " << error_exit << endl; maybe_exeunt(ex_spam, error_exit); } cerr << progid << " normal completion" << endl; return(ex_good); } //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ //// pid_t pid = getpid(); //// cout << pid << endl; //// cout << getpgid(pid) << endl; int argc(_argc); const char **argv(_argv); { progname = *argv++; argc--; mypid = getpid(); stringstream binder; binder << basename(progname) << "[" << mypid << "]"; progid = binder.str(); } skrewt mysk; while (argc) { string arg(*argv); argv++; argc--; if (arg.substr(0,2) == "--") arg = arg.substr(1); if (prefix(arg, "-help")) { usage(0); } if (0) { } else if (prefix(arg, "-mid-required")) { mysk.mid_required++; } else if (prefix(arg, "-error-exit")) { mysk.error_exit++; } else if (prefix(arg, "-maxsize")) { if (!argc) { cerr << "Option -maxsize requires an argument" << endl; exit(ex_usage); } mysk.maxsize = atoi(*argv); argv++; argc--; } else if (arg.substr(0,1) == "-") { cerr << "Unrecognized option '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; exit(ex_usage); } else { cerr << "Extraneous verbiage '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; exit(ex_usage); } } int rslt = mysk.headers(); if (rslt) return rslt; // Headers are done. // Do some early-stage thinking. rslt = mysk.interstage(); if (rslt) return rslt; rslt = mysk.body(); return rslt; }