diff options
author | John Denker <jsd@av8n.com> | 2021-10-17 10:10:18 -0700 |
---|---|---|
committer | John Denker <jsd@av8n.com> | 2021-10-17 11:09:24 -0700 |
commit | 74ddd0381aa1b1a90eb0d5300fa576cb2348eeac (patch) | |
tree | 72a9dded6f800467d52e479eb37574e6de5f2e6c | |
parent | 634d365a03cb0581a062cd3cf4db9ae69f1cde26 (diff) |
basically functional, but still a work in progress
-rw-r--r-- | .gitignore | 17 | ||||
-rw-r--r-- | arg_parser.c | 288 | ||||
-rw-r--r-- | arg_parser.h | 95 | ||||
-rw-r--r-- | binomial-coefficient-choose.h | 6 | ||||
-rw-r--r-- | makefile | 32 | ||||
-rw-r--r-- | parse_csv.c | 184 | ||||
-rw-r--r-- | parse_csv.h | 34 | ||||
-rw-r--r-- | poiss.c | 658 | ||||
-rw-r--r-- | poissonier.h | 108 |
9 files changed, 1422 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..60f56d2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +RCS +CVS +*.o +*.d +*~ +*#[0-9]* +[#]*# +.#* +*.aux +*.aux1 +*.haux +*.htoc +*.log + +############### + +poiss diff --git a/arg_parser.c b/arg_parser.c new file mode 100644 index 0000000..d4ff196 --- /dev/null +++ b/arg_parser.c @@ -0,0 +1,288 @@ +using namespace std; +#include "arg_parser.h" +#include <complex> +#include <stdexcept> + +using namespace std; + +// Convert a string to this-or-that representation. +// Specialization is not the same as instantiation! +// Atox<int> and Atox<double> need to come early, +// because they get used by later things. + +template<typename T> struct cvt2 { + static double doit(string const sss){ + stringstream parse; + parse.str(sss); + T rslt; + parse >> rslt; + if (parse.fail()) throw invalid_argument("'" + sss + "' isn't a proper " + myTraits<T>::name); + if (parse.eof()) return rslt; + do { + char ch; + parse >> ch; + if (parse.fail()) break; + if (ch == 'k') rslt *= 1024; + else if (ch == 'M') rslt *= 1048576; + else break; + parse >> ws; // without this, we wouldn't know whether we were at EoF + if (!parse.eof()) break; + return rslt; // whew! good. + } while (0); + throw invalid_argument("'" + sss + "' has extraneous verbiage"); + return 0; /* should never happen */ + } +}; + + +template<typename T> struct helper{ + static T convert(string const &arg); +}; + +template<> struct helper<string> { + static string convert(string const &arg){ + return arg; + } +}; + +template<> struct helper<int> { + static int convert(string const &arg){ + return cvt2<int>::doit(arg.c_str()); + } +}; + +template<> struct helper<double> { + static double convert(string const &arg){ + if (tolower(arg) == "inf") return Inf; + if (tolower(arg) == "-inf") return -Inf; + return cvt2<double>::doit(arg.c_str()); + } +}; + +// convert to double, then downgrade to float: +template<> struct helper<float> { + static float convert(string const &arg){ + return helper<double>::convert(arg); + } +}; + +template<typename B> struct helper<vector<B> > { + static vector<B> convert(string const &_arg){ + string arg(_arg); + vector<B> rslt; + while (arg.length()) { + string::size_type where = arg.find(','); + string word; + if (where == arg.npos) { + word = arg; + arg = ""; + } else { + word = arg.substr(0, where); + arg = arg.substr(1+where); + } + rslt.push_back(helper<B>::convert(word)); + } + return rslt; + } +}; + +template<typename B> +string num_from_list(const string arg, B& rslt){ + if (arg.length()) { + string::size_type where = arg.find_first_of(", \t"); + string word; + if (where == arg.npos) { + rslt = helper<B>::convert(arg); + return ""; + } else { + rslt = helper<B>::convert(arg.substr(0, where)); + return arg.substr(1+where); + } + } +// should never get here + return arg; +} + +template<typename B> struct helper<complex<B> > { + static complex<B> convert(string const &_arg){ + string arg(_arg); + B rp(0), ip(0); + arg = num_from_list(arg, rp); + arg = num_from_list(arg, ip); + return complex<B>(rp, ip); + } +}; + +// This is the interface that ordinary users use. +// The point of all this is that you can perform +// "partial specification" (what I might call re-specialization) +// on classes, but not on functions. +template <typename T> +T Atox(std::string const &str) { + // Just delegate. + return helper<T>::convert(str); +} + +////////////////////////////////// +// Various getters. + +//+++ fileGetter .... +//ordinary constructor: + fileGetter:: fileGetter(const std::string fname){ + file.open(fname.c_str()); + } + + std::string fileGetter::get(){ + std::string str; + for (;;) { + file >> str; // try to read one word + if (file.fail()) return ""; // failed + if (str[0] != '#') return str; // got a comment; + string junk; + getline(file, junk); //throw it away + // go around the loop, read next line + } + } + +// beware that neither bad() nor fail() is the opposite of good() + int fileGetter::fail() { + return file.fail(); + } + +//+++ cmdGetter .... +//ordinary constructor: + cmdGetter::cmdGetter(int const _argc, char const * const * _argv) + : argc(_argc), argv(_argv), wasgood(argc) {} + + std::string cmdGetter::get(){ + if (argc <= 0) { + wasgood = 0; + return ""; + } + wasgood = argc--; + return *argv++; + } + + int cmdGetter::fail() { + return !wasgood; + } + +// That's all the getters for now. +////////////////////// + +string tolower(const string& arg){ + string dupe(arg); + for (string::iterator foo = dupe.begin(); + foo != dupe.end(); foo++){ + *foo = tolower(*foo); + } + return dupe; +} + +string arg_parser::xcase(const string& arg){ + if (!fold_case) return arg; + return tolower(arg); +} + +// Constructor, for reading words from command line: + arg_parser::arg_parser(int const _argc, char const * const * _argv) + : fold_case(0) + { + std::shared_ptr<baseGetter> tmp (new cmdGetter(_argc, _argv)); + getsome.push_front(tmp); + } + +// Constructor, for reading words from file: + arg_parser::arg_parser(const std::string fname) + : fold_case(0), keyword("(?" "?)"), narg(0) + { + push(fname); + } + + void arg_parser::push(const std::string fname){ + std::shared_ptr<baseGetter> tmp (new fileGetter(fname)); + getsome.push_front(tmp); + } + + int arg_parser::fail(){ + if (!getsome.size()) return 1; // is this really correct? + return getsome.front()->fail(); + } + +// see if cur arg is a prefix of patt */ +int arg_parser::prefix(const string& kwpatt){ + if ((cur.length()==0 && kwpatt.length()!=0)) return 0; // disallow "", unless exact match + if (xcase(cur) != xcase(kwpatt).substr(0, cur.length())) return 0; + keyword = kwpatt; + narg = 0; + return 1; +} + + template<class T=std::string> T arg_parser::nextRaw(){ + cur = ""; + for (;;) { + if (!getsome.size()) { + break; + } + cur += getsome.front()->get(); + if (fail()) { + // here if read failed; pop the context + getsome.pop_front(); + // if read failed late; return what we have + if (cur.length()) return Atox<T>(cur); + // if read failed early, try again in parent context + continue; + } + size_t len = cur.length(); + if (len == 0) break; // zero length, can't have a comma + if (cur[len-1] != ',') break; + // else has a comma, keep reading + } + return Atox<T>(cur); + } + +// return next item, assuming it is an argument: +template<class T=std::string> T arg_parser::nextArg(){ + if (0) cerr << "next called with narg: " << narg + <<" kw: " << keyword + << endl; + T rslt = nextRaw<T>(); + narg++; + if (fail()) { + ostringstream buf; + buf << "Keyword " << keyword + << " requires argument #" << narg + << " of type " << myTraits<T>::name; + throw invalid_argument(buf.str()); + } + return rslt; +} + +template <typename T> +arg_parser& arg_parser::operator>>(T& val){ + val = nextArg<T>(); + return *this; +} + +// from .h file +// template<typename T> T Atox(std::string const &foo); + +// specializations and explicit instantiations: +// The first two lines of this macro help us figure +// out the /name/ of a type. +#define setup(T) \ + template <> const char* myTraits<T >::name = #T; /* specialization */ \ + template struct myTraits<T >; /* instantiation */ \ + template arg_parser& arg_parser::operator>>(T& val); \ + template T Atox<T>(std::string const &arg); \ + template T arg_parser::nextRaw<T>(); + +// Last but not least, instantiate the things we are going to need. +// I don't know why the compiler can't do this automatically. +setup(int); +setup(double); +setup(float); +setup(string); +setup(vector<int>); +setup(vector<double>); +setup(complex<double>); diff --git a/arg_parser.h b/arg_parser.h new file mode 100644 index 0000000..2ee69c6 --- /dev/null +++ b/arg_parser.h @@ -0,0 +1,95 @@ +#ifndef ARG_PARSER_H +#define ARG_PARSER_H + +#include <iostream> +#include <memory> +#include <string> +#include <list> +#include <vector> +#include <fstream> +#include <sstream> // for basic parsing + // and for formatting error msgs +#include <limits> // for ::infinity() + +// TODO: use "some_sstream >> some_int" instead of Atoi +// since that allows more error checking + +#ifndef HAVE_INF +double const Inf = std::numeric_limits<double>::infinity(); +#define HAVE_INF +#endif + +std::string tolower(const std::string& arg); + +// This is the interfact that ordinary users use; +// implementation in arg_parser.c is slightly tricky, +// but users never see that. +template<typename T> T Atox(std::string const &foo); + +// The base class: +class baseGetter{ +public: + virtual std::string get()=0; + virtual int fail()=0; + virtual ~baseGetter(){ +//---- cerr << "dtor: baseGetter" << std::endl; + }; + baseGetter(){ +//---- cerr << "basic ctor: baseGetter" << std::endl; + } +}; + +// A Getter that reads words from the command line: +class cmdGetter : public baseGetter{ +public: + int argc; + char const * const * argv; + int wasgood; + +//ordinary constructor: + cmdGetter(int const _argc, char const * const * _argv); + virtual std::string get(); + virtual int fail(); +}; + +// A Getter that reads words from a file: +class fileGetter : public baseGetter{ +public: + std::fstream file; + +//ordinary constructor: + fileGetter(const std::string fname); + virtual std::string get(); + virtual int fail(); + void dummy() { + if (Inf); + } +}; + +class arg_parser{ +public: + std::list <std::shared_ptr<baseGetter> > getsome; + std::string cur; + int fold_case; + std::string keyword; + int narg; // number of args picked up using >> after this keyword + +// Constructor, for reading words from command line: + arg_parser(int const _argc, char const * const * _argv); +// Constructor, for reading words from file: + arg_parser(const std::string fname); + void push(const std::string fname); + int fail(); + template<class T=std::string> T nextArg(); + template<class T=std::string> T nextRaw(); + +// see if cur arg is a prefix of patt */ + int prefix(const std::string& patt); + template <typename T> arg_parser& operator>>(T& val); + std::string xcase(const std::string& arg); +}; + +template<typename T> struct myTraits {static const char* name;}; + +#define arg_parser_h +#endif diff --git a/binomial-coefficient-choose.h b/binomial-coefficient-choose.h new file mode 100644 index 0000000..cd0d91f --- /dev/null +++ b/binomial-coefficient-choose.h @@ -0,0 +1,6 @@ +#ifndef BINOMIAL_COEFFICIENT_CHOOSE_H +#define BINOMIAL_COEFFICIENT_CHOOSE_H + +int binomialCoeff(int const n, int const k); + +#endif diff --git a/makefile b/makefile new file mode 100644 index 0000000..45009b0 --- /dev/null +++ b/makefile @@ -0,0 +1,32 @@ +LDFLAGS := -lnlopt + +c_sources := poiss.c arg_parser.c parse_csv.c + +% : %.c # cancel built-in one-step rule + +%.o : %.c + g++ -g -O2 -Wall $< -c + +% : %.o + g++ $^ $(LDFLAGS) -o $@ + +.PHONY : all + +## dependency-finding scheme (with local mods) based on: +## http://www.gnu.org/manual/make-3.77/html_mono/make.html#SEC42 +## (requires an include statement at end of the makefile) +%.d: %.c + @$(SHELL) -ec '$(CXX) -MM $(CXXFLAGS) $< \ + | sed '\''s/\($*\)\.o[ :]*/\1.o $@ : /g'\'' > $@; \ + [ -s $@ ] || rm -f $@' + +################ end of pattern rules, start of real rules + +all : poiss + +poiss : poiss.o arg_parser.o parse_csv.o + g++ $^ $(LDFLAGS) -o $@ + +ifndef NO_DOT_D + include $(c_sources:.c=.d) +endif diff --git a/parse_csv.c b/parse_csv.c new file mode 100644 index 0000000..12e98f3 --- /dev/null +++ b/parse_csv.c @@ -0,0 +1,184 @@ +#include <istream> +#include <string> +#include <vector> +#include <stdexcept> +#include "parse_csv.h" +using namespace std; + +/************* + +Format defined: + https://tools.ietf.org/html/rfc4180 +See also: + https://en.wikipedia.org/wiki/Comma-separated_values + +We depart from the RFC by ignoring \r, so that lines can +end with either \r\n (dos style) or simply \n (unix style). + +We check for malformed quoted fields and throw exceptions. + +The last line of the input .csv file may or may not +contain a newline: +:; echo -en "xxx\n" | ./parse_csv_demo - | od -b -Anone + 170 170 170 012 +:; echo -en "xxx" | ./parse_csv_demo - | od -b -Anone + 170 170 170 012 + +A line containing nothing but the newline has one field +with a zero-length datum: +:; echo -en "\n" | ./parse_csv_demo - | od -b -Anone + 012 + +A line containing no data /and/ no newline isn't a line +at all. No line, no fields, no data: +:; echo -en "" | ./parse_csv_demo - | wc + 0 0 0 + +************/ + +string const Q("\""); + +enum class CSVState { + Start, + UnquotedField, + QuotedField, + QuoteInQuote +}; + +// Read one row of .csv table. +// Return a zero-length vector if we encounter EoF +// before seeing any data. +// Note that a table row can extend across multiple .csv file lines, +// if there are embedded newlines. +// +// Within quoted fields, we accept commas, quotes, and newlines, +// e.g "Hughie, ""Louie"", Dewey". +template <class datum = qstring> +vector<datum> readCSVRow(istream& input) { + using namespace std; + if (input.eof()) return vector<datum>(0); + CSVState state = CSVState::Start; + vector<datum> fields(0); + char c; + for (;;) { + input.read(&c, 1); + if (input.eof()) { + if (state == CSVState::QuotedField) + throw runtime_error("Unterminated quote in .csv file in field# " + + to_string(fields.size()) + + "\n... namely <" + + Q + fields.back() + ">"); + break; + } + if (!input.good()) throw runtime_error("input error in .csv file"); + +// if we made it this far, the line contains at least one datum: + if (fields.size() == 0) fields.push_back(datum()); + switch (state) { + case CSVState::Start: + switch (c) { + case ',': // zero-length field + fields.push_back(datum()); + break; + case '"': + state = CSVState::QuotedField; +// This is no-op if the datum is not of type qstring: + set_q(fields.back(), 1); + break; + case '\r': + break; + case '\n': + goto EndRecord; + default: + state = CSVState::UnquotedField; + fields.back().push_back(c); + break; + } + break; + case CSVState::UnquotedField: + switch (c) { + case ',': // start a new field: + state = CSVState::Start; + fields.push_back(datum()); + break; + case '"': throw runtime_error + ("Stray quote in .csv file in field# " + + to_string(fields.size()) + + "\n... namely <" + + fields.back() + Q + ">" + ); + break; + case '\r': break; + case '\n': goto EndRecord; + default: fields.back().push_back(c); + break; + } + break; + case CSVState::QuotedField: + switch (c) { + case '"': state = CSVState::QuoteInQuote; + break; +// Accept anything except quote here; +// that includes comma, \r, and \n. + default: fields.back().push_back(c); + break; + } + break; +// Previous state saw a quote; we consider what follows: + case CSVState::QuoteInQuote: + switch (c) { + case ',': // comma after closing quote + state = CSVState::Start; + fields.push_back(datum()); + break; + case '"': // double ", map to plain " + fields.back().push_back('"'); + state = CSVState::QuotedField; + break; + case '\r': break; + case '\n': goto EndRecord; + default: throw runtime_error + ("Extraneous verbiage in .csv file after quoted field# " + + to_string(fields.size()) + + "\n... namely <" + + Q + fields.back() + Q + + string(1,c) +">"); + break; + } + break; + } + } + EndRecord:;;;; + return fields; +} + +// Read entire CSV file. +// Within quoted fields, we accept commas, quotes, and newlines, +// e.g "Hughie, ""Louie"", Dewey". +template <class datum = qstring> +vector<vector<datum> > readCSV(istream &in) { + using namespace std; + vector<vector<datum> > table; + datum row; + while (!in.eof()) { + auto fields = readCSVRow<datum>(in); + if (fields.size() == 0) break; + table.push_back(fields); + } + return table; +} + +// Explicit instantiation for the <qstring> version. +// If you want some other version, you'll have to instantiate it. +template vector<qstring> readCSVRow<qstring>(istream& input); +template vector<vector<qstring> >readCSV<qstring>(istream &in); +// Apparently it's not necessary to mention get_q() here, +// but probably more portable to do it anyway: +////template int get_q<qstring>(qstring const&); + +#if 1 +// Explicit instantiation for the <string> version. +template vector<string> readCSVRow<string>(istream& input); +template vector<vector<string> >readCSV<string>(istream &in); +/////template int get_q<string>(string const&); +#endif diff --git a/parse_csv.h b/parse_csv.h new file mode 100644 index 0000000..362c478 --- /dev/null +++ b/parse_csv.h @@ -0,0 +1,34 @@ +#ifndef PARSE_CSV__H +#define PARSE_CSV__H +#include <vector> + +struct qstring : public std::string { + int q; // was explicitly quoted +// Constructors: + qstring(): q(0) {} + qstring(int const _q): q(_q) {} + qstring(std::string const &s): std::string(s), q(0) {} + qstring(std::string const &s, int const _q): std::string(s), q(_q) {} +}; + +// Helper functions, not class member functions: +// Simple case, do nothing: +template<class T> inline void set_q(T& target, int const val) {} +template<class T> inline int get_q(T const& target) {return 0;} + +// Specialization: Fancy case, actually act on q: +template<> inline void set_q(qstring& target, int const val) { + target.q = val; +} + +template<> inline int get_q(qstring const& target){ + return target.q; +} + +template <class datum = qstring> +std::vector<datum> readCSVRow(std::istream& input); + +template <class datum = qstring> +std::vector<std::vector<datum> > readCSV(std::istream &in); + +#endif @@ -0,0 +1,658 @@ +///////////////////////// + +#include <iostream> +using namespace std; + +void usage() { + cout << R"EoF(Program to fit to Poisson distributed data. +Uses actual Poisson probability (i.e. not Gaussians, and +therefore not least squares). + +Options include: + -h # print this message (and immediately exit) + -ifile $fn # input file, date for -fit + -gfile $fn # log information about initial guesses for -fit + -fit $fn # do the fit; send results to $fn + +# the rest are for testing and background investigations: + -dkcurve $fn # smooth curves and scattered data at fake abscissas + -entropy $fn # entropy of the Poisson distro + # as a function of the intensity (lambda) + -seed $str # seed the RNG with $str + -seed -- # seed the RNG from /dev/random + +To write to stdout, use "-" for the $fn. + +See ./doit for a working example, including the SS --meld stanzas. +)EoF"; +} + +#include <iomanip> /* for setw */ +#include <vector> +#include <nlopt.hpp> /* the minimum-finder */ +#include "arg_parser.h" +#include <string> +#include <sstream> +#include <map> +#include "parse_csv.h" +#include "poissonier.h" + +// Used for equal weighting: +vector<double> const u5(5, 1.0); + +// Class to describe a sitch, i.e. a situation, i.e. +// everything the objective function needs to know: +struct sitcher { + vector<double> norm; // keep the adjustable parameters close to unity + vector<double> t1; + vector<double> t2; + vector<int> obs; // actual observed counts in the interval [t1, t2] +}; + +// Instantaneous rate at time ttt. +// Takes parameters two at a time: (amplitude, decay rate). +// If there are an odd number of parameters, +// the last one is the baseline, i.e. independent of time, +// equivalent to a decay with the rate locked at zero. +// Decay rates are in nats per second. +double inst_multi_exp(vector<double> const& prm, + double const ttt, vector<double> const norm) { + double rslt(0); + int nn = prm.size(); + for (int ii = 0; ii < nn; ii+=2){ + double amp(prm[ii] * norm[ii]); + double rate(1+ii < nn ? prm[1+ii] * norm[1+ii] : 0.); + if (rate) { + double rate(prm[1+ii]); +// use the negative rate: + rslt += amp * exp(- rate * ttt); + } else { + rslt += amp; + } + } + return rslt; +} + +// Format all the elements of a C++ vector, +// creating a string suitable for printing. +template<class T> +string dump(vector<T> const arg) { + stringstream rslt; + string sep = ""; + for (T val : arg) { + rslt << sep << val; + sep = ", "; + } + return rslt.str(); +} + +// Format all the elements of a counted array, +// creating a string suitable for printing. +template<class T> +string dump(int const nn, T const arg[]) { + stringstream rslt; + string sep = ""; + for (int ii = 0; ii < nn; ii++) { + rslt << sep << arg[ii]; + sep = ","; + } + return rslt.str(); +} + +// numerically accurate sinh(arg)/arg +template<class T> +inline T sinhc(T arg) { + if (abs(arg) < 1e-4) return 1 + arg*arg/6; + return sinh(arg)/arg; +} + +struct evendor { + double t; // time + double n; // number of events +}; + +// Intensity, usually denoted lambda, +// i.e. integral of rate(t) dt over the given interval. +double inty_multi_exp(vector<double> const& prm, + double const t1, double const t2, vector<double> const norm) { + double rslt(0); + int nn = prm.size(); + for (int ii = 0; ii < nn; ii+=2){ + double amp(prm[ii] * norm[ii]); + double rate(1+ii < nn ? prm[1+ii] * norm[1+ii] : 0.); + double mid((t2 + t1) / 2.); + double tau(t2 - t1); + rslt += amp * exp(-rate*mid) * tau * sinhc(rate*tau/2.); + } + return rslt; +} + +// the actual objective function +// returns total log probability, summed over all data points +// +// _sitch is unchanged, +// but cannot be declared const, because bobyqa wouldn't like that. +double limp(unsigned int const nprm, double const* _prm, + double * _grad, void * _sitch) { + sitcher& sitch(*(sitcher*)(_sitch)); + if (_grad != 0) throw invalid_argument("grad"); + if (sitch.norm.size() != nprm) throw invalid_argument("norm"); + vector<double> prm(_prm, _prm+nprm); + if (prm.size() != nprm) throw logic_error("vector ctor"); + double rslt(0); + int npts(sitch.t1.size()); + for (int ii = 0; ii < npts; ii++){ + double inty = inty_multi_exp(prm, sitch.t1[ii], sitch.t2[ii], sitch.norm); + rslt -= poissonier::lpmd(inty, sitch.obs[ii]); + } + return rslt; +} + +// Scan the data array, +// looking for records that look like headers +// (as opposed to numerical data). +int count_header(vector<vector<string> > const aoa){ + int NR = aoa.size(); + int hh = 0; + for (; hh < NR; hh++) { + if (aoa[hh].size() == 0) continue; + string word = aoa[hh][0]; + size_t where; + where = word.find_first_not_of(" "); + if (where == string::npos) continue; + word = word.substr(where); + try { + stod(word, &where); // don't care about return value + } + catch (exception& ee) { + continue; + } + word = word.substr(where); + where = word.find_first_not_of(" "); + if (where == string::npos) break; + } + return hh; +} + +// Some tricky heuristics. +// Guess a starting point for the fitting process +// (i.e. minimization process). +// A good guess speeds things up and greatly increases +// the chance of finding the global minimum (as opposed +// to getting stuck in a worthless local minimum, or +// diverging to arrant nonsense). +struct guesser { + int hh; + int npts; + vector<evendor> evt; + vector<double> model; + sitcher sitch; + + guesser() : hh(0), npts(0) {} + + void setup(vector<vector<string> > const aoa) { + int NR = aoa.size(); // includes header row + if (NR == 0) throw invalid_argument("Zero-length input file"); + hh = count_header(aoa); + npts = NR - hh; + if (0) cout << "NR: " << NR + << " npts: " << npts + << endl; + evt = vector<evendor>(npts); + double minute(60); + for (int ii = 0; ii < npts; ii++) { + // file times are in minutes; convert to SI units here: + evt[ii].t = stod(aoa[hh+ii][0]) * minute; + evt[ii].n = stod(aoa[hh+ii][1]); + } + + double max_t = evt[npts-1].t; + double max_n = evt[npts-1].n; + + // starting point of the baseline estimate, + // as a fraction of the total span of the data, + // assuming data starts at zero: + double tail_frac = 0.9; + int tail_i = -1; + + // half life (in seconds) of the fast component: + double fast_h = 307.2; + double fast_a = M_LN2 / fast_h; + int fast_i = -1; + + // half life (in seconds) of the slow component: + double slow_h = 45720; + double slow_a = M_LN2 / slow_h; + + double dead(5); // 2**-5 = 3% + + for (int ii = 0; ii < npts; ii++) { + double time = evt[ii].t; + if (time < tail_frac * max_t) tail_i = ii; + if (time < fast_h * dead) fast_i = ii; + } + + if (0) cout << "npts: " << npts + << " : " << max_t + << "," << max_n + << endl; + + double tail_t = evt[tail_i].t; + double tail_n = evt[tail_i].n; + double tail_r = (max_n - tail_n) / (max_t - tail_t); + + if (0) cout << "tail_i: " << tail_i + << " : " << tail_t + << "," << tail_n + << " " << tail_r + << endl; + + double fast_t = evt[fast_i].t; + double fast_n = evt[fast_i].n; + if (0) cout << "fast_i: " << fast_i + << " : " << fast_t + << "," << fast_n + // << " " << fast_r + << endl; + + if (tail_i < 0) throw invalid_argument("wtf?"); + if (fast_i < 0) throw invalid_argument("wtf?"); + + // beginning times: + double slow_tb = evt[fast_i].t; + double slow_nb = evt[fast_i].n; + + double slow_dt = max_t - slow_tb; + double slow_dn = max_n - slow_nb; + double slow_mag = slow_dn - slow_dt * tail_r; + slow_mag /= (1 - exp(-slow_a * slow_dt) * (1 + slow_dt * slow_a)); + // valid at time slow_tb: + double slow_rx = slow_mag * slow_a; + // valid at time zero: + double slow_r = slow_rx * exp(slow_a * slow_tb); + + double slow_rem = slow_r * exp(-slow_a * max_t); + double bl_r = tail_r - slow_rem; + + if (0) cout << "slow_r: " << slow_r + << " slow_rx: " << slow_rx + << " slow_mag: " << slow_mag + << " slow_rem: " << slow_rem + << " tail_r: " << tail_r + << " bl_r: " << bl_r + << endl; + + vector<double> slow_model{slow_r, slow_a, bl_r}; + + double fast_t0 = evt[0].t; + double fast_n0 = evt[0].n; + double fast_t1 = evt[fast_i].t; + double fast_n1 = evt[fast_i].n; + double model_inty = inty_multi_exp(slow_model, fast_t0, fast_t1, u5); + double fast_tot = fast_n1 - fast_n0; + double fast_dn = fast_tot - model_inty; + double fast_r = fast_dn * fast_a; + if (0) cout << "fast_r: " << fast_r + << " fast_dn: " << fast_dn + << " fast_tot: " << fast_tot + << " model_inty: " << model_inty + << endl; + + model = vector<double>{fast_r, fast_a}; + model.insert(model.end(), slow_model.begin(), slow_model.end()); + } + +// Reformat the data, to make it more useful to +// the objective function: + void more() { + for (int ii = 1; ii < npts; ii++) { + sitch.t1.push_back(evt[ii-1].t); + sitch.t2.push_back(evt[ii ].t); + sitch.obs.push_back(evt[ii].n - evt[ii-1].n); + } + } + +// Show the guessed parameters. +// Also do a sweep of one variable, as a qualitative sanity check. + void show(string const ofile) { + if (ofile == "") return; + ofstream xxout; + if (ofile != "-") { + xxout.open(ofile); + } + ostream& xout(ofile != "-" ? xxout : cout); + + size_t ss = sitch.t1.size(); + xout << ss << endl; + xout << sitch.t1[ss-1] + << " " << sitch.t2[ss-1] + << " " << sitch.obs[ss-1] + << endl; + + vector<double> prm(u5); + + xout << dump(model) << endl; + + xout << "bl/norm,bl,limp" << endl; + double bl0 = prm[4]; + void* context = (void*)(&sitch); + int nprm(prm.size()); + + for (double bl = bl0*.9; bl <= bl0*1.1; bl += bl/100.) { + prm[4] = bl; + double limpy = limp(nprm, prm.data(), 0, context); + xout << bl + << "," << bl*sitch.norm[4] + << "," << limpy << endl; + } + } +}; + +// Decode result codes returned by nlopt functions. +// This is documented to be part of the nlopt library +// but is absent from the version I have. +const char *nlopt_result_to_string(nlopt_result result) +{ + switch(result) + { + case NLOPT_FAILURE: return "FAILURE"; + case NLOPT_INVALID_ARGS: return "INVALID_ARGS"; + case NLOPT_OUT_OF_MEMORY: return "OUT_OF_MEMORY"; + case NLOPT_ROUNDOFF_LIMITED: return "ROUNDOFF_LIMITED"; + case NLOPT_FORCED_STOP: return "FORCED_STOP"; + case NLOPT_SUCCESS: return "SUCCESS"; + case NLOPT_STOPVAL_REACHED: return "STOPVAL_REACHED"; + case NLOPT_FTOL_REACHED: return "FTOL_REACHED"; + case NLOPT_XTOL_REACHED: return "XTOL_REACHED"; + case NLOPT_MAXEVAL_REACHED: return "MAXEVAL_REACHED"; + case NLOPT_MAXTIME_REACHED: return "MAXTIME_REACHED"; +///??????? case NLOPT_NUM_RESULTS: return NULL; + } + return NULL; +} + +// Add something to a particular component of a vector. +template<class T> +vector<T> goose(vector<T> const& vvv, int const ii, T const delta) { + vector<T> rslt(vvv); + rslt[ii] += delta; + return rslt; +} + +void do_fit(poissonier& poi, string const ifile, + string const ofile, string const gfile) { + if (ifile == "") { + if (ofile != "") throw invalid_argument("do_fit needs an input file"); + return; + } + + ifstream inx; + if (ifile != "-") { + inx.open(ifile); + if (! inx.good()) { + cerr << "Cannot open input '" << ifile << "'" << endl; + exit(1); + } + } + + istream& in(ifile == "-" ? cin : inx); + vector<vector<string> > aoa; + aoa = readCSV<string>(in); + guesser ggg; + ggg.setup(aoa); + ggg.more(); + ggg.sitch.norm = ggg.model; + ggg.show(gfile); + + vector<double> prm = u5; + int nprm(prm.size()); + vector<double> const lower(nprm, 0.5); + vector<double> const upper(nprm, 1.5); + + nlopt_result rslt = NLOPT_FORCED_STOP; // avoid "unused" warning + double limp_end(-9e99); + vector<double> found(prm); // will get modified in place + int OK(0); + try { + void* context = (void*)(&ggg.sitch); + nlopt_opt oppy = nlopt_create(NLOPT_LN_BOBYQA, nprm); + rslt = nlopt_set_lower_bounds1(oppy, 0.5); + rslt = nlopt_set_upper_bounds1(oppy, 1.5); + rslt = nlopt_set_min_objective(oppy, limp, context); + rslt = nlopt_set_xtol_rel(oppy, 1e-7); + rslt = nlopt_optimize(oppy, found.data(), &limp_end); + OK = 1; + } + catch (exception& eee) { + cout << "Fitting bombed out: " << eee.what() << endl; + } + + if (OK && ofile != "") { + ofstream xxout; + if (ofile != "-") { + xxout.open(ofile); + } + ostream& xout(ofile != "-" ? xxout : cout); + + double DoF(ggg.npts - nprm); + xout << ifile << endl; + cout << "Fit returns:, " << rslt + << ", i.e., " << nlopt_result_to_string(rslt) + << ", limp:, " << limp_end + << ", perdof:, " << limp_end / (ggg.npts - nprm) + << endl; + xout << "Fit returns:, " << rslt + << ", i.e., " << nlopt_result_to_string(rslt) + << ", limp:, " << limp_end + << ", perdof:, " << limp_end / DoF + << endl; + xout << endl; + xout << ",fast.amp, fast.dk, slow.amp, slow.dk, bl" << endl; + xout << "Normed:, " << dump(found) << endl; + vector<double> combi(nprm); + for (int ii = 0; ii < nprm; ii++) { + combi[ii] = found[ii] * ggg.sitch.norm[ii]; + } + xout << "SI:, " << dump(combi) << endl; + vector<double> flip(combi); + flip[1] = M_LN2/combi[1]; + flip[3] = M_LN2/combi[3]; + xout << "½life:, " << dump(flip) << endl; + +// Calculate the uncertainties. +// In particular, calculate the Mahalanobis metric +// i.e. the second derivative (Hessian) of the log improbability. + vector<vector<double> > covar(nprm, vector<double>(nprm)); + vector<double> probe(u5); + sitcher sitch(ggg.sitch); + sitch.norm = combi; + void* context = (void*)(&sitch); + double delta(0.01); + xout << endl; + for (int ii = 0; ii < nprm; ii++) { + string sep = ""; + for (int jj = 0; jj < nprm; jj++) { + double maha; + if (ii != jj) { + maha = limp(nprm, goose(goose( + probe, ii, delta), jj, delta).data(), 0, context) + + limp(nprm, goose(goose( + probe, ii, -delta), jj, -delta).data(), 0, context) + - limp(nprm, goose(goose( + probe, ii, delta), jj, -delta).data(), 0, context) + - limp(nprm, goose(goose( + probe, ii, -delta), jj, delta).data(), 0, context); + maha = maha/4./delta/delta; + } else { + maha = limp(nprm, goose(probe, ii, delta).data(), 0, context) + + limp(nprm, goose(probe, ii, -delta).data(), 0, context) + -2.0*limp(nprm, probe.data(), 0, context); + maha = maha/delta/delta; + } + xout << sep << setprecision(3) << setw(11) << fixed << maha; + sep = ", "; + } + xout << endl; + } + } +} + +// Crude reconnaissance. +// Calculate the decay curves using made-up parameters. +// Not in the critical path. +void do_dk(poissonier& poi, string const ofile) { + if (ofile == "") return; + ofstream xxout; + if (ofile != "-") { + xxout.open(ofile); + } + ostream& xout(ofile != "-" ? xxout : cout); + sitcher sitch; // selected abscissas + sitcher smooth; // all abscissas + + double step(1); + vector<double> prm( {30, .1, 10, .01, 1} ); + + int dt(5); + for (int ii = 0; ii < 500; ii+= dt) { + double tt1(step*ii); + double tt2(tt1 + dt); + double intensity(inty_multi_exp(prm, tt1, tt2, u5)); + int obs = poi.sample(intensity); + sitch.t1.push_back(tt1); + sitch.t2.push_back(tt2); + sitch.obs.push_back(obs); + } + + for (int ii = 0; ii < 500; ii++) { + smooth.t1.push_back(step*ii); + } + +// here with both sitchers fully constructed. + + xout << "Seed:," << poi.graine << endl; +// output both the smooth curves +// and the observations (to the extent they exist) + for (int ii = 0; ii < 500; ii++) { + double tt1(smooth.t1[ii]); + double sminty(inst_multi_exp(prm, tt1, u5)); + xout << tt1 << "," << sminty; + if (ii < (int) sitch.t1.size()) { + double fake(max(0.+sitch.obs[ii], 0.2)); // to facilitate log axes + double tt1(sitch.t1[ii]); + double tt2(sitch.t2[ii]); + double minty(inty_multi_exp(prm, tt1, tt2, u5)); + xout << "," << tt1 + << "," << tt2 + << ",x" + << "," << minty + << "," << minty + << ",x" + << "," << fake + << "," << fake + << ",x"; + } + xout << endl; + } +} + +// Class used by do_entropy. +// Use a class rather than a plain function, +// because it returns multiple results. +struct entroper { + double ptot, stot, ii; + void go(double const rw_obs, double const rw_mod) { + ptot = 0; + stot = 0; + int ii; + int lim = int(ceil(rw_obs)); + lim *= 10; + for (ii = 0; ii < lim; ii++) { + if (ptot > .999999) break; + double pobs = poissonier::pmd(rw_obs, ii); + if (pobs != 0) { + double lpmod = poissonier::lpmd(rw_mod, ii); + ptot += pobs; + stot -= pobs * lpmod; + } else { + // don't do a calculation that could + // possibly multiply zero by -infinity. + } + } + } +}; + +// Out of curiosity, calculate the entropy of the Poisson distribution +// as a function of intensity (lambda). +// Not in the critical path. +void do_entropy(string const ofile) { + if (ofile == "") return; + ofstream xxout; + if (ofile != "-") { + xxout.open(ofile); + } + ostream& xout(ofile != "-" ? xxout : cout); + + double Delta(1.01); + xout << "Delta," << Delta << endl; + xout << "rw, ii, ptot, Scat, Sdog, Semu" << endl; + double ratio(pow(10., .005)); + entroper cat, dog, emu; + for (double rw = 0.01; rw < 1500; rw *= ratio) { + cat.go(rw, rw); + dog.go(rw, rw/Delta); + emu.go(rw, rw*Delta); + xout << rw + << "," << cat.ptot + << "," << cat.ii + << "," << cat.stot + << "," << dog.stot + << "," << emu.stot + << endl; + } +} + +int main(int argc, char * const * argv) { + poissonier poi; + string seed; + string ifile; + arg_parser arg(argc, argv); + arg.fold_case = 1; + string progname = arg.nextRaw(); + string dkfile, entfile, fitfile, gfile; + + for (;;){ + string raw_arg = arg.nextRaw(); // get next keyword + if (arg.fail()) break; + int where = arg.cur.length(); + if (where){ + if (arg.cur[where-1] == ':') arg.cur = arg.cur.substr(0,where-1); + } + if (0) {} + else if (arg.prefix("help") || arg.prefix("-h")) { + usage(); + exit(0); + } + else if (arg.prefix("-ifile")) arg >> ifile; + else if (arg.prefix("-seed")) arg >> seed; + else if (arg.prefix("-entropy")) arg >> entfile; + else if (arg.prefix("-dkcurve")) arg >> dkfile; + else if (arg.prefix("-gfile")) arg >> gfile; + else if (arg.prefix("-fit")) arg >> fitfile; + else { + cerr << "Unrecognized command '" << raw_arg << "'" << endl; + exit(1); + } + } + if (seed == "--") seed = poi.randomize(); + else if (seed != "") poi.seed(seed); + + if (!( fitfile.length() + || dkfile.length() + || entfile.length() + )) + dkfile = "-"; + do_dk(poi, dkfile); + do_fit(poi, ifile, fitfile, gfile); + do_entropy(entfile); +} diff --git a/poissonier.h b/poissonier.h new file mode 100644 index 0000000..95da15e --- /dev/null +++ b/poissonier.h @@ -0,0 +1,108 @@ +#ifndef POISSONIER_H +#define POISSONIER_H + +#include <random> /* for seed_seq */ +#include <iostream> +#include <time.h> /* for clock_gettime */ +#include <vector> +#ifdef BOOST_CPD +# include <boost/math/distributions/poisson.hpp> +#endif + +static char const printable[64+1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789._"; + +std::string base64(unsigned long int arg) { + std::string str; + for (;;) { + str = printable[arg & 63] + str; + arg >>= 6; + if (! arg) break; + } + return str; +} + +std::string randomstr(unsigned int const nn=15) { + using namespace std; +// FIXME: ??? Could maybe use std::bitset here: + random_device rd; + string str; + while (str.length() < nn) { + unsigned int foo(rd()); + int nch = (sizeof(foo) * 8) / 6; + for (int ii = 0; ii < nch; ii++) { + str += printable[foo & 63]; + foo >>= 6; + } + } + return str; +} + +class poissonier { +public: + std::mt19937 basegen; + std::string graine; // last seed fed to basegen + +// constructor + poissonier () { + timespec time; + clock_gettime(CLOCK_REALTIME, &time); + graine = base64(time.tv_sec); + graine += base64(time.tv_nsec); + seed(graine); + } + double sample(double const lambda) { +// There is no way to change the intensity after the +// distro is constructed, so construct a new one each time: + return std::poisson_distribution<>(lambda)(basegen); + } + void seed(std::string const seed) { +// Need a temporary variable here, because the seed() function +// modifies it: + graine = seed; + std::seed_seq seed1(seed.begin(), seed.end()); + basegen.seed(seed1); + } + std::string randomize() { + graine = randomstr(15); + seed(graine); + return graine; + } + void check(double const rate) { + using namespace std; + cout << "Checking: " << rate + << " gives: " << sample(rate) + << endl; + } + +// Note this is stateless. Static member function. +// +// Log of the probability mass distribution: + static double lpmd(double const lambda, int const k){ + // https://en.wikipedia.org/wiki/Poisson_distribution#Definition + if (lambda < 0) throw std::invalid_argument("negative Poisson intensity"); + if (lambda == 0) { + if (k == 0) return 0; // 100% probability + return -INFINITY; // 0% probability + } + return k * log(lambda) - lambda - lgamma(k + 1.0); + } + +// pmd = probability mass distribution + inline static double pmd(double const lambda, int const k){ + return exp(lpmd(lambda, k)); + } + +#ifdef BOOST_CPD +// not implemented +// not needed, and would require boost +// cpd = cumulative probability distribution + static double cpd(double const lambda, int const k){ + boost::math::poisson_distribution<> pd(lambda); + return boost::math::cpd(pd, k); + } +#endif +}; + +#endif /* POISSONIER_H */ |