/////////////////// // skrewt.c // // scrutinize email // #include #include /* for exit() */ #include /* for strcmp() */ #include /* toupper */ #include #include /* perror */ #include #include using namespace std; void usage(const int sts){ (sts ? cerr : cout) << "Usage: skrewt [options]\n" "\n" " Scrutinizes email. Reads stdin, copies it to stdout.\n" " Exit result 0 means good, 21 means rejection (spam).\n" " Writes reason for rejection to stderr.\n" "\n" " Typically used as a filter in a pipeline, along with spamc -E\n" " Options\n" " -help print this msg (and exit immediately).\n" " -maxsize ii msg size in bytes; anything bigger will be rejected.\n" "\n" " Messages containing the string '-please-bounce-this-' will be rejected.\n" " Messages with no date will be rejected.\n" ; exit(sts); } // error exit codes, mostly as stated in qmail.c #define bar \ foo(good, 0) ;\ foo(spam, 21) ;\ foo(permerr, 31) ;\ foo(usage, 39) ;\ foo(greylisting, 70) ;\ foo(syserr, 71) ;\ foo(comerr, 74) ; #define foo(name, num) const int ex_ ## name = num bar #undef foo ///////////////////////////////////////////////////////// // Case insensitive comparison of strings class lessthan_foldcase{ public: bool operator() (const std::string& a, const std::string& b) const { size_t a_len = a.length(); size_t b_len = b.length(); size_t lim = a_len < b_len ? a_len : b_len; for (size_t i=0; i chb) return false; } // here if one is an extension of the other if ( a_len < b_len ) return true; return false; } }; // Returns negative if a is less than b in alphabetical order // returns 0 if they are the same, or positive if a is greater. // Like perl cmp operator, but ignores case. int cmp_casefold(const std::string& a, const std::string& b) { string::const_iterator aa, bb; aa = a.begin(); bb = b.begin(); while (aa != a.end() && bb != b.end()){ char ca = tolower(*aa++); char cb = tolower(*bb++); if (ca != cb) return ca < cb ? -2 : 2; } if (aa != a.end()) return 1; // a is longer if (bb != b.end()) return -1; // b is longer return 0; } string toLower(const std::string& a){ string rslt = a; string::iterator rr; for (rr = rslt.begin(); rr != rslt.end(); rr++){ *rr = tolower(*rr); } return rslt; } //////////////// string ltrim(string foo){ size_t where = foo.find_first_not_of(" \t\r\n"); if (where == foo.npos) return foo; return foo.substr(where); } //////////////// // little utility to help with argument parsing: // int prefix(const string shorter, const string longer){ return shorter == longer.substr(0, shorter.length()); } void exeunt(const int sts){ if (sts == ex_good) exit(sts); const char* foo = getenv("HI_Q_GROUP"); if (!foo) exit(sts); // No point in signalling ourself: sighandler_t rslt = signal(SIGUSR1, SIG_IGN); if (rslt == SIG_ERR) { cerr << "error setting signal" << endl; } int k = kill(-atoi(foo), SIGUSR1); if (k) { cerr << "kill failed on group " << atoi(foo) << " ... "; perror(0); } exit(sts); } string basename(const string path){ size_t where = path.rfind("/"); if (where != string::npos) return path.substr(1+where); return path; } string progname, progid; int mypid; /* Content-Type: text/plain; charset="us-ascii" */ /* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165" */ void parse_content(const string type_spec_line, string &maintype, string &boundary) { string mainline(type_spec_line); string get_type(toLower(mainline)); size_t where = get_type.find_first_of(" \t;\n"); if (where == string::npos) { // keep whole string } else { get_type = get_type.substr(0,where); } where = get_type.find("/"); if (where == string::npos){ maintype = ""; } else { maintype = get_type.substr(0, where); } } #ifdef xxxxxxxxxxxxxxxxxxxx if if (0) cerr << "type (" << get_type << ") " << (text_type?"text":"nope") << endl; string srch = "boundary="; where = headrec.find(srch); if (where != string::npos) { where += srch.length(); boundary = headrec.substr(where); if (boundary[0] == '"') { boundary = boundary.substr(1); where = boundary.find_first_of("\""); } else { where = boundary.find_first_of(" \t;\n"); } if (where == string::npos) { /* do nothing, boundary=boundary as a whole */ } else { boundary = boundary.substr(0, where); } } } #endif //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ //// pid_t pid = getpid(); //// cout << pid << endl; //// cout << getpgid(pid) << endl; int argc(_argc); const char **argv(_argv); { progname = *argv++; argc--; mypid = getpid(); stringstream binder; binder << basename(progname) << "[" << mypid << "]"; progid = binder.str(); } int maxsize(1000*1000); while (argc) { string arg(*argv); argv++; argc--; if (arg.substr(0,2) == "--") arg = arg.substr(1); if (prefix(arg, "-help")) { usage(0); } if (prefix(arg, "-maxsize")) { if (!argc) { cerr << "Option -maxsize requires an argument" << endl; exit(ex_usage); } maxsize = atoi(*argv); argv++; argc--; } if (arg.substr(0,1) == "-") { cerr << "Unrecognized option '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; exit(ex_usage); } else { cerr << "Extraneous verbiage '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; exit(ex_usage); } } int saw_blank_line(0); string boundary("x-xx-x"); string date; string subject; string content_type; string message_id; int msgsize(0); vector bigbuf; cerr << "hi there" << endl; for (;;){ // outer loop over all records in the header if (cin.eof()) break; if (cin.bad()) return 1; string headrec; // on fail, go back to top of outer loop and check for eof versus bad if (getline(cin, headrec).fail()) continue; msgsize += headrec.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } cout << headrec << endl; bigbuf.push_back(headrec); // for a folded record, this is the first line for (;;) { // inner loop to build a multi-line record e.g. folded record: if (cin.eof()) break; if (cin.bad()) return 1; char ch; if (cin.get(ch).fail()) continue; cin.putback(ch); if (ch != ' ' && ch != '\t') break; string line; // on fail, go back to top of inner loop and check for eof versus bad if (getline(cin, line).fail()) continue; msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } cout << line << endl; bigbuf.push_back(line); string cooked(line); if (cooked.length()){ string::iterator ptr = cooked.end()-1; if (*ptr == '\r') cooked.erase(ptr); } headrec += "\n" + cooked; } // here with a fully assembled header record int len = headrec.length(); if (len && headrec[len-1] == '\r') len--; // reduced length, not counting if (len == 0) { saw_blank_line = 1; break; // no more headers in this message } // here if it's a header line string headword; string rest; size_t where = headrec.find(":"); if (where != string::npos) { headword = headrec.substr(0, where); rest = ltrim(headrec.substr(1+where)); } headword = toLower(headword); if (0){ } else if (headword == "date") { date = rest; } else if (headword == "subject") { subject = rest; } else if (headword == "content-type") { content_type = rest; } //xxxx cout << headrec.length() << " ... "; } cerr << "headers are done. Delimited: " << saw_blank_line << endl; // Headers are done. // Do some early-stage thinking. if (subject.find("-please-bounce-this-") != string::npos) { cerr << progid << " rejection: by request" << endl; exeunt(ex_spam); } if (!date.length()) { cerr << progid << " rejection: no date" << endl; exeunt(ex_spam); // disallow mail with no date } string contype; int textlines(0); parse_content(content_type, contype, boundary); int is_text = contype == "text"; // early-stage thinking has been done. // Now spew the rest of the message cerr << "body begins: " << contype << " " << is_text << endl; int inheads(0); for (;;){ // outer loop over all lines in the body if (cin.eof()) break; if (cin.bad()) return 1; string line; // on fail, go back to top of outer loop and check for eof versus bad if (getline(cin, line).fail()) continue; msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } bigbuf.push_back(line); cout << line << endl; if (line == "--" + boundary) { inheads = 1; } else { if (is_text) { if (ltrim(line).length()) textlines++; } } } if (0) cerr << "textlines: " << textlines << endl; if (000 && !textlines) { cerr << progid << " rejection: no text" << endl; exeunt(ex_spam); } cerr << progid << " normal completion" << endl; exit(ex_good); }