#include "libskrewt.h" #include "utils.h" #include #include #include #include #include using namespace std; static int verbosity(0); void parse_content(const string type_spec_line, string &maintype, string &boundary, const string old_bdy) { if (verbosity > 1) cerr << "parser called with: " << type_spec_line << endl; string get_type(type_spec_line); size_t where = get_type.find_first_of(" \t;\n"); string rest; if (where == string::npos) { // keep whole string } else { rest = get_type.substr(where+1); get_type = toLower(get_type.substr(0,where)); } where = get_type.find("/"); if (where == string::npos){ maintype = ""; cerr << "could not find / in " << get_type << endl; } else { maintype = get_type.substr(0, where); } // now need to find boundary string srch = "boundary="; where = toLower(rest).find(srch); if (where != string::npos) { where += srch.length(); boundary = rest.substr(where); if (boundary[0] == '"') { boundary = boundary.substr(1); where = boundary.find_first_of("\""); } else { where = boundary.find_first_of(" \t;\n"); } if (where == string::npos) { /* do nothing, boundary=boundary as a whole */ } else { boundary = boundary.substr(0, where); } } else { boundary = old_bdy; } } int skrewt::krunch_rfrom(){ if (received_from == "") return 0; // probably a bounce message, no rfrom stringstream parse; parse.str(received_from); string word; parse >> word; if (word != "from") { cerr << progid << " bad 'Received: from' line; no from, instead '" << word << "'" << endl; return ex_syserr; } parse >> proximta_rDNS.name; for (;;) { // loop over words in this record parse >> word; size_t len = word.length(); if (len < 2) { cerr << progid << " bad 'Received: from' line ;;; short '" << word << "'" << endl; return ex_syserr; } if (word == "by") break; if (word == "(HELO" /*)*/) { parse >> proximta_HELO.name; proximta_HELO.name = rtrim(proximta_HELO.name, "()"); continue; } if (word[0] != '(' || word[len-1] != ')') { cerr << progid << " bad 'Received: from' line ;;; no parens: '" << word << "'" << endl; return ex_syserr; } proximta_IP = word.substr(1, len-2); size_t where = proximta_IP.find("@"); if (where != string::npos){ proximta_AuthUser = proximta_IP.substr(0, where); proximta_IP = proximta_IP.substr(1+where); } } // provide some kind of default? maybe not. if (0) if (proximta_HELO.name == "") { proximta_HELO.name = proximta_rDNS.name + " (+-)"; } return 0; } xstr skrewt::getLine(istream& xin){ string rslt; if (lookahead.err == 0) { rslt = lookahead.str; lookahead.err = 1; return {0, rslt}; } if (getline(xin, rslt).fail()) return {1, ""}; else return {0, rslt}; } xstr skrewt::getRecord(istream& xin){ xstr line = getLine(xin); if (line.err) return line; msgsize += line.str.length()+1; if (msgsize > maxsize) { cerr << progid << " header rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } bigbuf.push_back(line.str); // for a folded record, this is the first line: string headrec = noCR(line.str); // inner loop to build a multi-line record e.g. folded record: for (;;) { if (xin.eof()) break; if (xin.bad()) return {1, ""}; xstr more = getLine(xin); if (more.err) continue; char ch(0); if (more.str != "") ch = more.str[0]; if (ch != ' ' && ch != '\t') { lookahead = more; break; } msgsize += line.str.length()+1; if (msgsize > maxsize) { cerr << progid << " folded header rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } bigbuf.push_back(more.str); headrec += "\n" + noCR(more.str); } return {0, headrec}; } int skrewt::headers(istream& xin){ //xxxx cerr << progid << " begins" << endl; for (;;){ // outer loop over all records in the header if (xin.eof()) break; if (xin.bad()) return 1; xstr rec = getRecord(xin); if (rec.err) break; string headrec = rec.str; // here with a fully assembled header record // headrec (unlike line) contains no DOS CR characters int len = headrec.length(); if (len == 0) { saw_blank_line = 1; break; // no more headers in this message } // here if it's a header line string headword; string rest; size_t where = headrec.find(":"); if (where != string::npos) { headword = headrec.substr(0, where); rest = ltrim(headrec.substr(1+where)); } headword = toLower(headword); if (0){ } else if (headword == "from") { from = rest; } else if (headword == "to") { to = rest; } else if (headword == "return-path") { return_path.name = rest; } else if (headword == "message-id") { message_id = rest; } // Now pick up the proximal "received: from" line, // i.e. the latest in time, // i.e. the topmost in the file, // i.e. the one that was just now put there by qmail-smtpd: else if (headword == "received") { if (!received_from.length() && prefix("from ", rest)){ received_from = rest; } } else if (headword == "date") { date = rest; } else if (headword == "subject") { subject = rest; } else if (headword == "content-type") { content_type = rest; } else if (headword == "delivered-to") { delivered_to.push_back(rest); } recno++; if (0) if (recno <= 6) cerr << progid << "#" << recno << " " << headrec << endl; } return 0; } int skrewt::dump_bigbuf(std::ostream& xout){ for (vector::const_iterator foo = bigbuf.begin(); foo != bigbuf.end(); foo++){ xout << *foo << endl; } return 0; } void check_spf(name_tester& fqdn, const string ip) { sepofra my_spf; try { my_spf.check(ip, /* IP */ fqdn.name, /* HELO */ "", /* mail_from */ "", /* rcpt_to */ 0 /* verbosity */ ); string exp = my_spf.explain(); if (exp != "") cerr << progid << " " << exp << endl; // keep a copy of the result: fqdn.spf = neutral; if (my_spf.result == SPF_RESULT_PASS) fqdn.spf = pass; if (my_spf.result == SPF_RESULT_FAIL) fqdn.spf = fail; /* anything else, such as soft_fail, is treated as neutral */ } catch (bad_thing foo) { cerr << "Caught bad thing: " << foo.what() << endl; return; } } bool operator==(const struct addrinfo& aaa, const struct addrinfo& bbb) { if (aaa.ai_family != bbb.ai_family) return 0; if (memcmp(aaa.ai_addr, bbb.ai_addr, aaa.ai_addrlen) != 0) return 0; return 1; } // loop over all IP addresses in linked list, checking for a match bool matchany(const struct addrinfo& aaa, const struct addrinfo* blist){ for (const struct addrinfo *bbb = blist; bbb != NULL; bbb = bbb->ai_next){ if (aaa == *bbb) { return 1; // match } } return 0; } void check_map2ip(name_tester& fqdn, const string ipstr) { if (ipstr.length() == 0) { cerr << progid << " check_map2ip: no addr specified." << endl; fqdn.map2ip = fail; return; } // Convert address-as-string to address-as-bits. // Also get information about family. // The trick is, we will get address info about an /address/. // (This is in contrast to a normal fwd dns lookup, // which gets address info about a name.) struct addrinfo *ip_bits; addrinfo hints; int error; memset(&hints, 0, sizeof(struct addrinfo)); // restrict to TCP only; otherwise we get N records per address hints.ai_protocol = IPPROTO_TCP; error = getaddrinfo(ipstr.c_str(), NULL, &hints, &ip_bits); // EAI_NONAME covers the case of malformed IP address // e.g. 1.2.3.4.5 if (error == EAI_NONAME) { fqdn.map2ip = fail; return; } if (error) { // some unexpected error cerr << progid << " odd error " << error << " in getaddrinfo for " << ipstr << " : " << gai_strerror(error) << endl; fqdn.map2ip = fail; return; } if (!ip_bits) { cerr << progid <<" ??? should never happen (ipstr with no ipbits?)" << endl; fqdn.map2ip = fail; return; } // do the forward dns lookup // result is a list of ip addresses struct addrinfo *fwd_rslt; error = getaddrinfo(fqdn.name.c_str(), NULL, &hints, &fwd_rslt); if (error == EAI_NONAME) { // malformed name, or no address for name fqdn.map2ip = fail; return; } if (error) { cerr << progid << " getaddrinfo for " << fqdn.name << " error " << error << " i.e. " << gai_strerror(error) << endl; fqdn.map2ip = fail; return; } if (!matchany(*ip_bits, fwd_rslt)) { fqdn.map2ip = fail; return; } // here if all checks have been passed fqdn.map2ip = pass; return; } void check_name_ip(name_tester& fqdn, const string ip) { if (ip == "") { cerr << "check_name_ip: should never happen: email with no IP?" <"}' | sort | uniq -c | sort -nr 81 17 8 8 7 6 5 4 4 4 3 2 2 2 1 1 1 1 1 1 1 ? #endif #if 0 /* DKIM users */ 52 d=googlegroups.com; 27 d=barackobama.com; 10 d=gmail.com; 5 d=bronto.com; 5 d=bluehornet.com; 4 d=news.abebooks.com; 2 d=yahoo.co.uk; 2 d=sbcglobal.net; 2 d=embarqmail.com; 2 d=emailms.angieslist.com; 1 d=newsletters.sourceforge.net; 1 d=members.ebay.com; 1 d=info.citibank.com; 1 d=ebay.com; 1 d=commail1.co.za; #endif list badnews; int whitelisted(0); if (subject.find("sesame") != string::npos && subject.find("swordfish") != string::npos) { whitelisted++; } for (list::const_iterator foo = delivered_to.begin(); foo != delivered_to.end(); foo++){ cerr << progid << " Delivered-to: <<<" << *foo << ">>>" << endl; if (toLower(trim(*foo)) == "jean@av8n.com") { badnews.push_back("Looping Delivered-to: " + *foo); } } if (subject.find("-please-bounce-this-") != string::npos) { badnews.push_back("by request"); } if (!date.length()) { badnews.push_back("no date"); } if (proximta_HELO.spf == fail){ badnews.push_back("repudiated by spf (HELO)"); } if (proximta_rDNS.spf == fail){ badnews.push_back("repudiated by spf (rDNS)"); } cerr << "check: return_path: " << return_path.name << endl; if (return_path.name.find("@aexp.com") != string::npos) { badnews.push_back("long-running phishing pest: '" + received_from + "'"); } if (mid_required && !message_id.length()) { badnews.push_back("no message-id"); } if (badnews.size() && !whitelisted){ cerr << progid << " " << join(", ", badnews) << endl; if (strictness){ cerr << progid << " ... badnews from '" << from << "' to '" << to << "'" << endl; exeunt(ex_spam); } } return 0; } class conner{ public: string tp; string bdy; }; void dump(const list sitch){ int depth = sitch.size(); cerr << " depth: " << depth; if (depth){ cerr << " tp: " << sitch.front().tp << " bdy: " << sitch.front().bdy; } cerr << endl; } // constructor skrewt::skrewt() : proximta_rDNS_flagged(0), spf_result(SPF_RESULT_INVALID), boundary("x-xx-x"), msgsize(0), saw_blank_line(0), recno(0), maxsize(1000*1000), strictness(0), note(""), mid_required(0), headerbuf(0), bigbuf(0), lookahead(1, "") { // expand the macro in a way that will initialize the decoder table: # define foo(name) decode_test_state[name] = #name; test_state_macro # undef foo } int skrewt::body(std::istream& xin, std::ostream& xout){ list sitch; if (content_type.length()) { string tp, bdy; parse_content(content_type, tp, bdy, ""); sitch.push_front({tp, bdy}); } else { // assume single-part, all text if not otherwise specified: sitch.push_front({"text", ""}); } // early-stage thinking has been done. // Now spew the rest of the message // Note that multipart messages can have sub-headers. if (verbosity) { cerr << "body begins: "; dump(sitch); } int in_subheads(0); int textlines(0); string sub_conntype(""); for (;;){ // outer loop over all lines in the body if (xin.eof()) break; if (xin.bad()) return 1; xstr rec = getRecord(xin); if (rec.err) break; string line = rec.str; if (verbosity) cerr << "+++" << line << endl; if (in_subheads){ if (line == "" || line == "\r") { in_subheads = 0; if (verbosity) cerr << "+++ end of subhead" << endl; string tp, bdy; parse_content(sub_conntype, tp, bdy, sitch.front().bdy); sitch.push_front({tp, bdy}); if (verbosity) { cerr << "payload mode: "; dump(sitch); } } } if (/* still */in_subheads){ string junk; // FIXME // in principle could worry about folded headers, // but in this application it doesn't actually matter string headword; string rest; size_t where = line.find(":"); if (where != string::npos) { headword = line.substr(0, where); rest = ltrim(line.substr(1+where)); } headword = toLower(headword); if (headword == "content-type") { sub_conntype = rest; } } else { if (sitch.front().bdy != "" && line == "--" + sitch.front().bdy) { if (sitch.front().tp != "multipart") sitch.pop_front(); if (verbosity) { cerr << "boundary: begin subhead block "; dump(sitch); } in_subheads = 1; sub_conntype = ""; continue; } if (sitch.front().bdy != "" && line == "--" + sitch.front().bdy + "--") { string dead_bdy = sitch.front().bdy; sitch.pop_front(); if (sitch.front().bdy == dead_bdy) sitch.pop_front(); if (verbosity) { cerr << "boundary: termination "; dump(sitch); } in_subheads = 0; continue; } if (sitch.front().tp == "text") textlines++; } } if (verbosity) cerr << "textlines: " << textlines << endl; if (!textlines) { cerr << progid << " rejection: no text: " << strictness << endl; maybe_exeunt(ex_spam, strictness); } cerr << progid << " normal completion" << endl; return(ex_good); } void maybe_exeunt(const int sts, const int really){ if (!really) return; if (sts == ex_good) exit(sts); const char* foo = getenv("HI_Q_GROUP"); if (!foo) exit(sts); // No point in signalling ourself: sighandler_t rslt = signal(SIGUSR1, SIG_IGN); if (rslt == SIG_ERR) { cerr << "error setting signal" << endl; } int k = kill(-atoi(foo), SIGUSR1); if (k) { cerr << "kill failed on group " << atoi(foo) << " ... "; perror(0); } exit(sts); } void exeunt(const int sts){ maybe_exeunt(sts, 1); }