/////////////////// // mail-scan.c #include #include /* for exit() */ #include #include #include /* toupper */ #include #include #include /* for stat() */ #include /* stat() */ #include /* perror */ #include #include "utils.h" using namespace std; void usage(const int sts){ (sts ? cerr : cout) << "Usage: mail-scan [options] filename [more filenames]\n" "\n" " Options\n" " -help print this msg (and exit immediately).\n" " -vert invert: print only if *no* match.\n" " -l print filename only, not matching text.\n" " -group print a blank line after every match.\n" " -max nn print at most nn records per file.\n" " -multi print multi-line records on multiple lines\n" " (as opposed to smashing them all onto one long line\n" " -addr assume field contains somebody ; print just foo@bar.com\n" "\n" "\n" " Hint:\n" " mail-scan +from * | iconv -c ...\n" " | sed 's/.*@//;s/>$//' | sort | uniq -c | sort -nr > some-junk.from-count\n" "\n" " Also:\n" " grep score=[34] /home/user/Maildir/new/* -l | \\\n" " xargs mail-scan +From | blacklist-update\n" " Then:\n" " grep score=[34] /home/user/Maildir/new/* -l | \\\n" " xargs mv-to -i /home/user/Maildir/spam/\n" " Another hint: using the '-addr' feature:\n" " mail-scan +x-spam.*:score=[234] /home/jean/Maildir/spam/* -l | \\\n" " xargs mail-scan +from -addr | sort | uniq -c | sort -nr | head -20\n" ; exit(sts); } // exit codes, compatible with spamassassin (not with qmail-queue) const int sa_good(0); const int sa_spam(1); const int sa_usage(64); void exeunt(const int sts){ if (sts == sa_good) exit(sts); const char* foo = getenv("HI_Q_GROUP"); if (!foo) exit(sts); // No point in signalling ourself: sighandler_t rslt = signal(SIGUSR1, SIG_IGN); if (rslt == SIG_ERR) { cerr << "error setting signal" << endl; } int k = kill(-atoi(foo), SIGUSR1); if (k) { cerr << "kill failed on group " << atoi(foo) << " ... "; perror(0); } exit(sts); } class watcher { public: string key; string val; boost::regex valrx; boost::regex keyrx; watcher(const string init) : key(""), val("") { size_t where = init.find(":"); if (where != string::npos) { key = init.substr(0, where); val = init.substr(1+where); } else { key = init; } keyrx = boost::regex(key, boost::regex_constants::icase); valrx = boost::regex(val, boost::regex_constants::icase); //xx cerr << "watcher key: " << key << " val: " << val << endl; } }; //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ //// pid_t pid = getpid(); //// cout << pid << endl; //// cout << getpgid(pid) << endl; int argc(_argc); const char **argv(_argv); string progname(*argv); argv++; argc--; int maxsize(1000000); list watchword; list dofile; int vflag(0); int group_flag(0); int multi(0); int maxlines(0); int fname_only(0); int addr_mode(0); boost::regex host_filter(string("<.*@(.*)>"), boost::regex_constants::icase); boost::regex addr_filter(string("<(.*@.*)>"), boost::regex_constants::icase); while (argc) { string arg(*argv); argv++; argc--; if (arg.substr(0,2) == "--") arg = arg.substr(1); if (prefix(arg, "-help")) { usage(0); } if (prefix(arg, "-vert" /* short */)){ vflag++; continue; } if (prefix(arg, "-group" /* short */)){ group_flag++; continue; } else if (prefix(arg, "-l" /* short */)){ fname_only++; continue; } else if (prefix(arg, "-address" /* long */)){ addr_mode++; continue; } else if (prefix(arg, "-max" /* long */)){ if (!argc){ cerr << "Option '" << arg << "' requires an argument" << endl; cerr << "For help, try: " << progname << " -help" << endl; exit(sa_usage); } maxlines = atoi(*argv++); argc--; continue; } else if (prefix(arg, "-multi" /* long */)){ multi++; continue; } else if (arg.substr(0,1) == "-") { cerr << "Unrecognized option '" << arg << "'" << endl; cerr << "For help, try: " << progname << " -help" << endl; exit(sa_usage); } if (arg.substr(0,1) == "+") { watchword.push_back(arg.substr(1)); } else { dofile.push_back(arg); } } // loop over all files for (list::const_iterator file = dofile.begin(); file != dofile.end(); file++) { int didprint(0); struct stat filestatus; stat(file->c_str(), &filestatus ); if (S_ISDIR(filestatus.st_mode)) { cerr << "is directory: " << *file << endl; continue; } ifstream infile; infile.open(file->c_str()); if (!infile.good()) { cerr << "Failed to open file: " << *file << endl; // missing file is non-fatal; go on to next file continue; } int inheads(1); string boundary("x-xx-x"); int msgsize(0); int foundsome_infile(0); for (;;){ // loop over all records in this file if (inheads) { list Header; string line; for (;;) { // loop over all lines in this record if (infile.eof()) break; if (infile.bad()) { cerr << "mail-scan: read error on file '" << *file << "'" << endl; return 1; } if (getline(infile, line).fail()) continue; line = noCR(line); Header.push_back(line); msgsize += line.length()+1; if (msgsize > maxsize) { cerr << "skrewt rejection: bigger than " << maxsize << endl; exeunt(sa_spam); } char ch; if (infile.get(ch).fail()) continue; infile.putback(ch); if (ch != ' ' && ch != '\t') break; } if (Header.front().length() == 0) { inheads = 0; continue; // blank line needs no further processing } string headword; // the first thing on the line, e.g. "Subject" string rest; string header; if (!multi) header = join(" ", Header); else header = join("\n", Header); size_t where = header.find(":"); if (where != string::npos) { headword = header.substr(0, where); rest = ltrim(header.substr(1+where)); } for (list::const_iterator ptr = watchword.begin(); ptr != watchword.end(); ptr++) { // regex_match not regex_search ... keyrx must match *whole* headword if (boost::regex_match(headword, ptr->keyrx)){ // here if match as to keyword; check for match as to value if (ptr->val.length()==0 || boost::regex_search(rest, ptr->valrx)){ foundsome_infile++; if (!vflag) { if (!addr_mode){ // << foundsome_infile << " " ; (number of occurrences) cout << *file; if (!fname_only) { cout << " :: " << header; } cout << endl; didprint++; if (maxlines && didprint >= maxlines) goto endfile; } else /* addr_mode */{ boost::smatch matches; if (boost::regex_search(header, matches, addr_filter)){ cout << string(matches[1].first, matches[1].second) << endl; } else { string hdr(header); size_t where = hdr.find(':'); if (where != string::npos) { hdr = ltrim(hdr.substr(1+where)); } else { // DUBIOUS: print the whole thing cerr << "Warning, expected a ':', didn't see it" << endl; } cout << hdr << endl; } didprint++; if (maxlines && didprint >= maxlines) goto endfile; } } } } } // only show file once, even if there might have been multiple matches: if (fname_only && didprint) break; } else { // not in header break; } } // end loop over matching records in this file endfile:;;;; if (vflag && !foundsome_infile) { cout << *file << endl; didprint++; } if (group_flag && didprint) cout << endl; } }