From 6cdc5c9d9caecba0e13264931824a9ac3cd3f738 Mon Sep 17 00:00:00 2001 From: John Denker Date: Sun, 29 Jul 2012 15:26:53 -0700 Subject: nuisance --- tools/mail-scan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools/mail-scan.c') diff --git a/tools/mail-scan.c b/tools/mail-scan.c index 3749945..bb8605f 100644 --- a/tools/mail-scan.c +++ b/tools/mail-scan.c @@ -303,8 +303,7 @@ int main(int _argc, const char** _argv){ } } if (vflag && !foundsome_infile) { - cout << foundsome_infile - << " ... " << *file << endl; + cout << *file << endl; didprint++; } if (group_flag && didprint) cout << endl; -- cgit v1.2.3 From d074f40718110a3b289dff5fb594425431805df8 Mon Sep 17 00:00:00 2001 From: John Denker Date: Tue, 17 Jul 2012 07:04:19 -0700 Subject: show filename only once in "-l" mode --- tools/mail-scan.c | 60 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) (limited to 'tools/mail-scan.c') diff --git a/tools/mail-scan.c b/tools/mail-scan.c index bb8605f..0d7f722 100644 --- a/tools/mail-scan.c +++ b/tools/mail-scan.c @@ -168,6 +168,7 @@ public: } else { key = init; } + //xx cerr << "watcher key: " << key << " val: " << val << endl; } }; @@ -240,7 +241,7 @@ int main(int _argc, const char** _argv){ string boundary("x-xx-x"); int msgsize(0); int foundsome_infile(0); - for (;;){ // loop over all records in file + for (;;){ // loop over all records in this file if (inheads) { list Header; string line; @@ -264,44 +265,45 @@ int main(int _argc, const char** _argv){ } if (Header.front().length() == 0) { inheads = 0; + continue; // blank line needs no further processing + } + string headword; // the first thing on the line, e.g. "Subject" + string rest; + string header; + if (!multi) header = join(" ", Header); + else header = join("\n", Header); + size_t where = header.find(":"); + if (where != string::npos) { + headword = header.substr(0, where); + rest = ltrim(header.substr(1+where)); } - else { - string headword; // the first thing on the line, e.g. "Subject" - string rest; - string header; - if (!multi) header = join(" ", Header); - else header = join("\n", Header); - size_t where = header.find(":"); - if (where != string::npos) { - headword = header.substr(0, where); - rest = ltrim(header.substr(1+where)); - } // temporary? FIXME? lowercase - headword = toLower(headword); - rest = toLower(rest); - for (list::const_iterator ptr = watchword.begin(); - ptr != watchword.end(); ptr++) { - if (headword == toLower(ptr->key)) { - // here if match as to keyword; check for match as to value - if (ptr->val.length()==0 - || rest.find(toLower(ptr->val)) != string::npos) { - foundsome_infile++; - if (!vflag) { - // << foundsome_infile << " " ; (number of occurrences) - cout << *file; - if (!fname_only) cout << " :: " << header; - cout << endl; - didprint++; - } + headword = toLower(headword); + rest = toLower(rest); + for (list::const_iterator ptr = watchword.begin(); + ptr != watchword.end(); ptr++) { + if (headword == toLower(ptr->key)) { + // here if match as to keyword; check for match as to value + if (ptr->val.length()==0 + || rest.find(toLower(ptr->val)) != string::npos) { + foundsome_infile++; + if (!vflag) { + // << foundsome_infile << " " ; (number of occurrences) + cout << *file; + if (!fname_only) cout << " :: " << header; + cout << endl; + didprint++; } } } } +// only show file once, even if there might have been multiple matches: + if (fname_only && didprint) break; } else { // not in header break; } - } + } // end loop over matching records in this file if (vflag && !foundsome_infile) { cout << *file << endl; didprint++; -- cgit v1.2.3 From cbbd45a9700a660bcc4e2f762f5004c2aa2b1078 Mon Sep 17 00:00:00 2001 From: John Denker Date: Tue, 17 Jul 2012 14:33:26 -0700 Subject: implement "-addr" option in mail-scan --- tools/mail-scan.c | 51 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 9 deletions(-) (limited to 'tools/mail-scan.c') diff --git a/tools/mail-scan.c b/tools/mail-scan.c index 0d7f722..9e79ff9 100644 --- a/tools/mail-scan.c +++ b/tools/mail-scan.c @@ -13,6 +13,12 @@ // Then: // grep score=[34] /home/user/Maildir/new/* -l | xargs mv-to -i /home/user/Maildir/spam/ +///////////// +// Another hint: using the "-addr" feature: +// grep score=[234] /home/user/Maildir/spam/* -l | \... +// xargs mail-scan +from -addr | sort | uniq -c | sort -nr | head | blacklist-update -bare + + #include #include /* for exit() */ #include @@ -25,6 +31,8 @@ #include /* stat() */ #include /* perror */ +#include +////#include using namespace std; @@ -157,6 +165,8 @@ class watcher { public: string key; string val; + boost::regex valrx; + boost::regex keyrx; watcher(const string init) : key(""), val("") @@ -168,6 +178,8 @@ public: } else { key = init; } + keyrx = boost::regex(key, boost::regex_constants::icase); + valrx = boost::regex(val, boost::regex_constants::icase); //xx cerr << "watcher key: " << key << " val: " << val << endl; } }; @@ -189,6 +201,8 @@ int main(int _argc, const char** _argv){ int group_flag(0); int multi(0); int fname_only(0); + int addr_mode(0); + boost::regex addr_filter(string("<.*@(.*)>"), boost::regex_constants::icase); while (argc) { string arg(*argv); argv++; argc--; @@ -196,16 +210,19 @@ int main(int _argc, const char** _argv){ if (prefix(arg, "-help")) { usage(0); } - if (prefix(arg, "-vert")){ + if (prefix(arg, "-vert" /* short */)){ vflag++; continue; - } if (prefix(arg, "-group")){ + } if (prefix(arg, "-group" /* short */)){ group_flag++; continue; - } else if (prefix(arg, "-l")){ + } else if (prefix(arg, "-l" /* short */)){ fname_only++; continue; - } else if (prefix(arg, "-multi")){ + } else if (prefix(arg, "-address" /* long */)){ + addr_mode++; + continue; + } else if (prefix(arg, "-multi" /* long */)){ multi++; continue; } else if (arg.substr(0,1) == "-") { @@ -288,11 +305,27 @@ int main(int _argc, const char** _argv){ || rest.find(toLower(ptr->val)) != string::npos) { foundsome_infile++; if (!vflag) { - // << foundsome_infile << " " ; (number of occurrences) - cout << *file; - if (!fname_only) cout << " :: " << header; - cout << endl; - didprint++; + if (!addr_mode){ + // << foundsome_infile << " " ; (number of occurrences) + cout << *file; + if (!fname_only) { + cout << " :: " << header; + } + cout << endl; + didprint++; + } else /* addr_mode */{ + boost::smatch matches; +//// boost::match_flag_type flg; +//// boost::regex_search(header, matches, addr_filter); +//// boost::regex_search(header.begin(), header.end(), matches, addr_filter, flg); +/// boost::regex_search(header, matches, addr_filter); + if (boost::regex_search(header, matches, addr_filter)){ + cout << string(matches[1].first, matches[1].second) << endl; + } else { + cerr << "no match, ignoring: " << header << endl; + } + didprint++; + } } } } -- cgit v1.2.3 From 58b88adfe892e5a327148ad7a991d453ba231607 Mon Sep 17 00:00:00 2001 From: John Denker Date: Tue, 17 Jul 2012 15:17:17 -0700 Subject: implement regular expressions --- tools/mail-scan.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'tools/mail-scan.c') diff --git a/tools/mail-scan.c b/tools/mail-scan.c index 9e79ff9..1ea245f 100644 --- a/tools/mail-scan.c +++ b/tools/mail-scan.c @@ -15,9 +15,8 @@ ///////////// // Another hint: using the "-addr" feature: -// grep score=[234] /home/user/Maildir/spam/* -l | \... -// xargs mail-scan +from -addr | sort | uniq -c | sort -nr | head | blacklist-update -bare - +// mail-scan +x-spam.*:score=[234] /home/jean/Maildir/spam/* -l | \.... +// xargs mail-scan +from -addr | sort | uniq -c | sort -nr | head -20 #include #include /* for exit() */ @@ -294,15 +293,14 @@ int main(int _argc, const char** _argv){ headword = header.substr(0, where); rest = ltrim(header.substr(1+where)); } -// temporary? FIXME? lowercase - headword = toLower(headword); - rest = toLower(rest); + for (list::const_iterator ptr = watchword.begin(); ptr != watchword.end(); ptr++) { - if (headword == toLower(ptr->key)) { +// regex_match not regex_search ... keyrx must match *whole* headword + if (boost::regex_match(headword, ptr->keyrx)){ // here if match as to keyword; check for match as to value if (ptr->val.length()==0 - || rest.find(toLower(ptr->val)) != string::npos) { + || boost::regex_search(rest, ptr->valrx)){ foundsome_infile++; if (!vflag) { if (!addr_mode){ -- cgit v1.2.3 From 6e8083ff4ffe3fd2b6d337386637a2b5c1378cf7 Mon Sep 17 00:00:00 2001 From: John Denker Date: Wed, 25 Jul 2012 13:05:03 -0700 Subject: fix a bunch of DOS-CR bugs --- tools/mail-scan.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools/mail-scan.c') diff --git a/tools/mail-scan.c b/tools/mail-scan.c index 1ea245f..dc8aa5c 100644 --- a/tools/mail-scan.c +++ b/tools/mail-scan.c @@ -183,6 +183,16 @@ public: } }; +string noCR(const string bar){ + string foo(bar); + int len = foo.length(); + if (len){ + if (foo[len-1] == '\r') { + foo.erase(len-1); + } + } + return foo; +} //////////////////////////////////////////////////////////// int main(int _argc, const char** _argv){ @@ -268,6 +278,7 @@ int main(int _argc, const char** _argv){ return 1; } if (getline(infile, line).fail()) continue; + line = noCR(line); Header.push_back(line); msgsize += line.length()+1; if (msgsize > maxsize) { @@ -335,6 +346,7 @@ int main(int _argc, const char** _argv){ break; } } // end loop over matching records in this file + if (vflag && !foundsome_infile) { cout << *file << endl; didprint++; -- cgit v1.2.3 From 63be414b62f3234ad80607b95e8e71e33bfd8025 Mon Sep 17 00:00:00 2001 From: John Denker Date: Sun, 29 Jul 2012 17:00:15 -0700 Subject: move more stuff to utils.c ... I hate duplication of code --- tools/mail-scan.c | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) (limited to 'tools/mail-scan.c') diff --git a/tools/mail-scan.c b/tools/mail-scan.c index dc8aa5c..b0c4137 100644 --- a/tools/mail-scan.c +++ b/tools/mail-scan.c @@ -31,7 +31,7 @@ #include /* perror */ #include -////#include +#include "utils.h" using namespace std; @@ -105,30 +105,6 @@ int cmp_casefold(const std::string& a, const std::string& b) { return 0; } - -string toLower(const std::string& a){ - string rslt = a; - string::iterator rr; - for (rr = rslt.begin(); rr != rslt.end(); rr++){ - *rr = tolower(*rr); - } - return rslt; -} - -//////////////// -string ltrim(string foo){ - size_t where = foo.find_first_not_of(" \t\r\n"); - if (where == foo.npos) return foo; - return foo.substr(where); -} - -//////////////// -// little utility to help with argument parsing: -// -int prefix(const string shorter, const string longer){ - return shorter == longer.substr(0, shorter.length()); -} - void exeunt(const int sts){ if (sts == sa_good) exit(sts); -- cgit v1.2.3