#include "libskrewt.h" #include "utils.h" #include #include using namespace std; void parse_content(const string type_spec_line, string &maintype, string &boundary) { //xxx cerr << "parser called with: " << type_spec_line << endl; string get_type(type_spec_line); size_t where = get_type.find_first_of(" \t;\n"); string rest; if (where == string::npos) { // keep whole string } else { rest = get_type.substr(where+1); get_type = get_type.substr(0,where); } where = get_type.find("/"); if (where == string::npos){ maintype = ""; cerr << "could not find / in " << get_type << endl; } else { maintype = get_type.substr(0, where); } // now need to find boundary string srch = "boundary="; where = rest.find(srch); if (where != string::npos) { where += srch.length(); boundary = rest.substr(where); if (boundary[0] == '"') { boundary = boundary.substr(1); where = boundary.find_first_of("\""); } else { where = boundary.find_first_of(" \t;\n"); } if (where == string::npos) { /* do nothing, boundary=boundary as a whole */ } else { boundary = boundary.substr(0, where); } } else { //xxxxxxx cerr << "boundary= not found in " << type_spec_line << endl; } } int skrewt::krunch_rfrom(){ stringstream parse; parse.str(received_from); string word; parse >> word; if (word != "from") { cerr << progid << " bad 'Received: from' line ... '" << word << "'" << endl; return ex_syserr; } parse >> proximta_rDNS; parse >> word; if (word == "(HELO" /*)*/) { parse >> proximta_HELO; proximta_HELO = rtrim(proximta_HELO, "()"); parse >> word; } else { proximta_HELO = proximta_rDNS; } size_t len = word.length(); if (len<2 || word[0] != '(' || word[len-1] != ')') { cerr << progid << " bad 'Received: from' line ;;; '" << word << "'" << endl; return ex_syserr; } proximta_IP = word.substr(1, len-2); size_t where = proximta_IP.find("@"); if (where != string::npos){ proximta_AuthUser = proximta_IP.substr(0, where); proximta_IP = proximta_IP.substr(1+where); } return 0; } int skrewt::headers(istream& xin){ //xxxx cerr << progid << " begins" << endl; for (;;){ // outer loop over all records in the header if (xin.eof()) break; if (xin.bad()) return 1; string line; // on fail, go back to top of outer loop and check for eof versus bad if (getline(xin, line).fail()) continue; msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } bigbuf.push_back(line); string headrec = noCR(line); // for a folded record, this is the first line for (;;) { // inner loop to build a multi-line record e.g. folded record: if (xin.eof()) break; if (xin.bad()) return 1; char ch; if (xin.get(ch).fail()) continue; xin.putback(ch); if (ch != ' ' && ch != '\t') break; string line; // on fail, go back to top of inner loop and check for eof versus bad if (getline(xin, line).fail()) continue; msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; exeunt(ex_spam); } bigbuf.push_back(line); headrec += "\n" + noCR(line); } // here with a fully assembled header record // headrec (unlike line) contains no DOS CR characters int len = headrec.length(); if (len == 0) { saw_blank_line = 1; break; // no more headers in this message } // here if it's a header line string headword; string rest; size_t where = headrec.find(":"); if (where != string::npos) { headword = headrec.substr(0, where); rest = ltrim(headrec.substr(1+where)); } headword = toLower(headword); if (0){ } else if (headword == "from") { from = rest; } else if (headword == "to") { to = rest; } else if (headword == "return-path") { return_path = rest; } else if (headword == "message-id") { message_id = rest; } else if (headword == "received") { if (!received_from.length() && prefix("from ", rest)){ received_from = rest; } } else if (headword == "date") { date = rest; } else if (headword == "subject") { subject = rest; } else if (headword == "content-type") { content_type = rest; } else if (headword == "delivered-to") { delivered_to = rest; } recno++; if (0) if (recno <= 6) cerr << progid << "#" << recno << " " << headrec << endl; } return 0; } int skrewt::dump_headers(std::ostream& xout){ for (vector::const_iterator foo = bigbuf.begin(); foo != bigbuf.end(); foo++){ xout << *foo << endl; } return 0; } int skrewt::interstage(){ if (saw_blank_line) {/* ignore */} // Note that the headers are in reverse-chronological order: cerr << progid <<" Return-path: " << return_path <"}' | sort | uniq -c | sort -nr 81 17 8 8 7 6 5 4 4 4 3 2 2 2 1 1 1 1 1 1 1 #endif #if 0 /* DKIM users */ 52 d=googlegroups.com; 27 d=barackobama.com; 10 d=gmail.com; 5 d=bronto.com; 5 d=bluehornet.com; 4 d=news.abebooks.com; 2 d=yahoo.co.uk; 2 d=sbcglobal.net; 2 d=embarqmail.com; 2 d=emailms.angieslist.com; 1 d=newsletters.sourceforge.net; 1 d=members.ebay.com; 1 d=info.citibank.com; 1 d=ebay.com; 1 d=commail1.co.za; #endif list badnews; int whitelisted(0); if (subject.find("sesame") != string::npos && subject.find("swordfish") != string::npos) { whitelisted++; } if (delivered_to.length()){ cerr << progid << " Delivered-to: <<<" << delivered_to << ">>>" << endl; } if (toLower(trim(delivered_to)) == "jean@av8n.com") { badnews.push_back("Looping Delivered-to: " + delivered_to); } if (subject.find("-please-bounce-this-") != string::npos) { badnews.push_back("by request"); } if (!date.length()) { badnews.push_back("no date"); } if (mid_required && !message_id.length()) { badnews.push_back("no message-id"); } if (badnews.size() && !whitelisted){ cerr << progid << " " << join(", ", badnews) << endl; if (error_exit){ cerr << progid << " '" << from << "' to '" << to << "'" << endl; exeunt(ex_spam); } } return 0; } int skrewt::body(std::istream& xin, std::ostream& xout){ string main_contype; if (content_type.length()) parse_content(content_type, main_contype, boundary); // some slightly-useful booleans: int currently_text = main_contype == "text"; int main_multipart = main_contype == "multipart"; // early-stage thinking has been done. // Now spew the rest of the message // Note that multipart messages can have sub-headers. //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl; int in_subheads(0); int textlines(0); for (;;){ // outer loop over all lines in the body if (xin.eof()) break; if (xin.bad()) return 1; string line; // on fail, go back to top of outer loop and check for eof versus bad if (getline(xin, line).fail()) continue; msgsize += line.length()+1; if (msgsize > maxsize) { cerr << progid << " rejection: bigger than " << maxsize << endl; maybe_exeunt(ex_spam, error_exit); } bigbuf.push_back(line); xout << line << endl; if (in_subheads){ if (line == "" || line == "\r") in_subheads = 0; } if (in_subheads){ string sub_contype; string junk; // in principle could worry about folded headers, // but in this application it doesn't actually matter string headword; string rest; size_t where = line.find(":"); if (where != string::npos) { headword = line.substr(0, where); rest = ltrim(line.substr(1+where)); } headword = toLower(headword); if (headword == "content-type") { parse_content(rest, sub_contype, junk); currently_text = sub_contype == "text"; //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl; } } else { if (main_multipart && line == "--" + boundary) { //xxxx cerr << "found subhead boundary" << endl; in_subheads = 1; continue; } if (currently_text) textlines++; } } if (0) cerr << "textlines: " << textlines << endl; if (!textlines) { cerr << progid << " rejection: no text: " << error_exit << endl; maybe_exeunt(ex_spam, error_exit); } cerr << progid << " normal completion" << endl; return(ex_good); } void maybe_exeunt(const int sts, const int really){ if (!really) return; if (sts == ex_good) exit(sts); const char* foo = getenv("HI_Q_GROUP"); if (!foo) exit(sts); // No point in signalling ourself: sighandler_t rslt = signal(SIGUSR1, SIG_IGN); if (rslt == SIG_ERR) { cerr << "error setting signal" << endl; } int k = kill(-atoi(foo), SIGUSR1); if (k) { cerr << "kill failed on group " << atoi(foo) << " ... "; perror(0); } exit(sts); } void exeunt(const int sts){ maybe_exeunt(sts, 1); }