/////////////////// // skrewt.c // // scrutinize email // // Hint: // mail-scan +from * | iconv -c \... // | sed 's/.*@//;s/>$//' | sort | uniq -c | sort -nr > some-junk.from-count // // Also: // grep score=[34] /home/user/Maildir/new/* -l | xargs mail-scan +From | blacklist-update // Then: // grep score=[34] /home/user/Maildir/new/* -l | xargs mv-to -i /home/user/Maildir/spam/ #include #include /* for exit() */ #include #include #include /* toupper */ #include #include #include /* for stat() */ #include /* stat() */ #include /* perror */ using namespace std; void usage(const int sts){ (sts ? cerr : cout) << "Usage: skrewt [options]\n" "\n" " Scrutinizes email. Reads stdin, copies it to stdout.\n" " Exit result 0 means good, 1 means rejection (spam).\n" " Writes reason for rejection to stderr.\n" "\n" " Typically used as a filter in a pipeline, along with spamc -E\n" " Options\n" " -help print this msg (and exit immediately).\n" " -maxsize ii msg size in bytes; anything bigger will be rejected.\n" "\n" " Messages containing the string '-please-bounce-this-' will be rejected.\n" " Messages with no date will be rejected.\n" ; exit(sts); } // exit codes, compatible with spamassassin (not with qmail-queue) const int sa_good(0); const int sa_spam(1); const int sa_usage(64); ///////////////////////////////////////////////////////// // Case insensitive comparison of strings class lessthan_foldcase{ public: bool operator() (const std::string& a, const std::string& b) const { size_t a_len = a.length(); size_t b_len = b.length(); size_t lim = a_len < b_len ? a_len : b_len; for (size_t i=0; i chb) return false; } // here if one is an extension of the other if ( a_len < b_len ) return true; return false; } }; // Returns negative if a is less than b in alphabetical order // returns 0 if they are the same, or positive if a is greater. // Like perl cmp operator, but ignores case. int cmp_casefold(const std::string& a, const std::string& b) { string::const_iterator aa, bb; aa = a.begin(); bb = b.begin(); while (aa != a.end() && bb != b.end()){ char ca = tolower(*aa++); char cb = tolower(*bb++); if (ca != cb) return ca < cb ? -2 : 2; } if (aa != a.end()) return 1; // a is longer if (bb != b.end()) return -1; // b is longer return 0; } string toLower(const std::string& a){ string rslt = a; string::iterator rr; for (rr = rslt.begin(); rr != rslt.end(); rr++){ *rr = tolower(*rr); } return rslt; } //////////////// string ltrim(string foo){ size_t where = foo.find_first_not_of(" \t\r\n"); if (where == foo.npos) return foo; return foo.substr(where); } //////////////// // little utility to help with argument parsing: // int prefix(const string shorter, const string longer){ return shorter == longer.substr(0, shorter.length()); } void exeunt(const int sts){ if (sts == sa_good) exit(sts); const char* foo = getenv("HI_Q_GROUP"); if (!foo) exit(sts); // No point in signalling ourself: sighandler_t rslt = signal(SIGUSR1, SIG_IGN); if (rslt == SIG_ERR) { cerr << "error setting signal" << endl; } int k = kill(-atoi(foo), SIGUSR1); if (k) { cerr << "kill failed on group " << atoi(foo) << " ... "; perror(0); } exit(sts); } string join(const string sep, list stuff){ string rslt; if (!stuff.size()) return rslt; list::const_iterator ptr = stuff.begin(); rslt = *ptr++; for (; ptr != stuff.end(); ptr++){ rslt += sep; rslt += *ptr; } return rslt; } //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ //// pid_t pid = getpid(); //// cout << pid << endl; //// cout << getpgid(pid) << endl; int argc(_argc); const char **argv(_argv); string progname(*argv); argv++; argc--; int maxsize(1000000); list watchword; list dofile; int vflag(0); int multi(0); while (argc) { string arg(*argv); argv++; argc--; if (arg.substr(0,2) == "--") arg = arg.substr(1); if (prefix(arg, "-help")) { usage(0); } if (prefix(arg, "-verify")){ vflag++; continue; } else if (prefix(arg, "-multi")){ multi++; continue; } else if (arg.substr(0,1) == "-") { cerr << "Unrecognized option '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; exit(sa_usage); } if (arg.substr(0,1) == "+") { watchword.push_back(arg.substr(1)); } else { dofile.push_back(arg); } } for (list::const_iterator file = dofile.begin(); file != dofile.end(); file++) { struct stat filestatus; stat(file->c_str(), &filestatus ); if (S_ISDIR(filestatus.st_mode)) { cerr << "is directory: " << *file << endl; continue; } ifstream infile; infile.open(file->c_str()); if (!infile.good()) { cerr << "Failed to open file: " << *file << endl; exit(1); } int inheads(1); string boundary("x-xx-x"); int msgsize(0); int foundsome_infile(0); for (;;){ // loop over all records in file if (inheads) { list Header; string line; for (;;) { // loop over all lines in this record if (infile.eof()) break; if (infile.bad()) { cerr << "mail-scan: read error on file '" << *file << "'" << endl; return 1; } if (getline(infile, line).fail()) continue; Header.push_back(line); msgsize += line.length()+1; if (msgsize > maxsize) { cerr << "skrewt rejection: bigger than " << maxsize << endl; exeunt(sa_spam); } char ch; if (infile.get(ch).fail()) continue; infile.putback(ch); if (ch != ' ' && ch != '\t') break; } if (Header.front().length() == 0) { inheads = 0; } else { string headword; string rest; string header; if (!multi) header = join(" ", Header); else header = join("\n", Header); size_t where = header.find(":"); if (where != string::npos) { headword = header.substr(0, where); rest = ltrim(header.substr(1+where)); } headword = toLower(headword); for (list::const_iterator ptr = watchword.begin(); ptr != watchword.end(); ptr++) { if (headword == toLower(*ptr)) { foundsome_infile++; if (!vflag) cout // << foundsome_infile << " " << *file << " :: " << header << endl; } } } // cout << header << endl; } else { // not in header break; } } if (vflag && !foundsome_infile) cout << foundsome_infile << " ... " << *file << endl; } }