basically functional, but still a work in progress

author: John Denker <jsd@av8n.com> 2021-10-17 10:10:18 -0700
committer: John Denker <jsd@av8n.com> 2021-10-17 11:09:24 -0700
commit: 74ddd0381aa1b1a90eb0d5300fa576cb2348eeac (patch)
tree: 72a9dded6f800467d52e479eb37574e6de5f2e6c
parent: 634d365a03cb0581a062cd3cf4db9ae69f1cde26 (diff)
9 files changed, 1422 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..60f56d2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+RCS
+CVS
+*.o
+*.d
+*~
+*#[0-9]*
+[#]*#
+.#*
+*.aux
+*.aux1
+*.haux
+*.htoc
+*.log
+
+###############
+
+poiss
diff --git a/arg_parser.c b/arg_parser.c
new file mode 100644
index 0000000..d4ff196
--- /dev/null
+++ b/arg_parser.c
@@ -0,0 +1,288 @@
+using namespace std;
+#include "arg_parser.h"
+#include <complex>
+#include <stdexcept>
+
+using namespace std;
+
+// Convert a string to this-or-that representation.
+// Specialization is not the same as instantiation!
+// Atox<int> and Atox<double> need to come early,
+//  because they get used by later things.
+
+template<typename T> struct cvt2 {
+  static double doit(string const sss){
+    stringstream parse;
+    parse.str(sss);
+    T rslt;
+    parse >> rslt;
+    if (parse.fail()) throw invalid_argument("'" + sss + "' isn't a proper " + myTraits<T>::name);
+    if (parse.eof()) return rslt;
+    do {
+      char ch;
+      parse >> ch;
+      if (parse.fail()) break;
+      if (ch == 'k') rslt *= 1024;
+      else if (ch == 'M') rslt *= 1048576;
+      else break;
+      parse >> ws;        // without this, we wouldn't know whether we were at EoF
+      if (!parse.eof()) break;
+      return rslt;        // whew! good.
+    } while (0);
+    throw invalid_argument("'" + sss + "' has extraneous verbiage");
+    return 0;     /* should never happen */
+  }
+};
+
+
+template<typename T> struct helper{
+  static T convert(string const &arg);
+};
+
+template<> struct helper<string> {
+  static string convert(string const &arg){
+    return arg;
+  }
+};
+
+template<> struct helper<int> {
+  static int convert(string const &arg){
+    return cvt2<int>::doit(arg.c_str());
+  }
+};
+
+template<> struct helper<double> {
+  static double convert(string const &arg){
+    if (tolower(arg) == "inf") return Inf;
+    if (tolower(arg) == "-inf") return -Inf;
+    return cvt2<double>::doit(arg.c_str());
+  }
+};
+
+// convert to double, then downgrade to float:
+template<> struct helper<float> {
+  static float convert(string const &arg){
+    return helper<double>::convert(arg);
+  }
+};
+
+template<typename B> struct helper<vector<B> > {
+  static vector<B> convert(string const &_arg){
+    string arg(_arg);
+    vector<B> rslt;
+    while (arg.length()) {
+      string::size_type where = arg.find(',');
+      string word;
+      if (where == arg.npos) {
+        word = arg;
+        arg = "";
+      } else {
+        word = arg.substr(0, where);
+        arg = arg.substr(1+where);
+      }
+      rslt.push_back(helper<B>::convert(word));
+    }
+    return rslt;
+  }
+};
+
+template<typename B>
+string num_from_list(const string arg, B& rslt){
+    if (arg.length()) {
+      string::size_type where = arg.find_first_of(", \t");
+      string word;
+      if (where == arg.npos) {
+        rslt = helper<B>::convert(arg);
+        return "";
+      } else {
+        rslt = helper<B>::convert(arg.substr(0, where));
+        return arg.substr(1+where);
+      }
+    }
+// should never get here
+    return arg;
+}
+
+template<typename B> struct helper<complex<B> > {
+  static complex<B> convert(string const &_arg){
+    string arg(_arg);
+    B rp(0), ip(0);
+    arg = num_from_list(arg, rp);
+    arg = num_from_list(arg, ip);
+    return complex<B>(rp, ip);
+  }
+};
+
+// This is the interface that ordinary users use.
+// The point of all this is that you can perform
+// "partial specification" (what I might call re-specialization)
+// on classes, but not on functions.
+template <typename T>
+T Atox(std::string const &str) {
+  // Just delegate.
+  return helper<T>::convert(str);
+}
+
+//////////////////////////////////
+// Various getters.
+
+//+++ fileGetter ....
+//ordinary constructor:
+  fileGetter::  fileGetter(const std::string fname){
+    file.open(fname.c_str());
+  }
+
+  std::string fileGetter::get(){
+    std::string str;
+    for (;;) {
+      file >> str;              // try to read one word
+      if (file.fail()) return "";      // failed
+      if (str[0] != '#') return str;    // got a comment;
+      string junk;
+      getline(file, junk);              //throw it away
+      // go around the loop, read next line
+    }
+  }
+
+// beware that neither bad() nor fail() is the opposite of good()
+  int fileGetter::fail() {
+    return file.fail();
+  }
+
+//+++ cmdGetter ....
+//ordinary constructor:
+  cmdGetter::cmdGetter(int const _argc, char const * const * _argv)
+  : argc(_argc), argv(_argv), wasgood(argc) {}
+
+  std::string cmdGetter::get(){
+    if (argc <= 0) {
+      wasgood = 0;
+      return "";
+    }
+    wasgood = argc--;
+    return *argv++;
+  }
+
+  int cmdGetter::fail() {
+    return !wasgood;
+  }
+
+// That's all the getters for now.
+//////////////////////
+
+string tolower(const string& arg){
+  string dupe(arg);
+  for (string::iterator foo = dupe.begin();
+            foo != dupe.end(); foo++){
+    *foo = tolower(*foo);
+  }
+  return dupe;
+}
+
+string arg_parser::xcase(const string& arg){
+    if (!fold_case) return arg;
+    return tolower(arg);
+}
+
+// Constructor, for reading words from command line:
+  arg_parser::arg_parser(int const _argc, char const * const * _argv)
+  : fold_case(0)
+  {
+    std::shared_ptr<baseGetter> tmp (new cmdGetter(_argc, _argv));
+    getsome.push_front(tmp);
+  }
+
+// Constructor, for reading words from file:
+  arg_parser::arg_parser(const std::string fname)
+  : fold_case(0), keyword("(?" "?)"), narg(0)
+  {
+    push(fname);
+  }
+
+  void arg_parser::push(const std::string fname){
+    std::shared_ptr<baseGetter> tmp (new fileGetter(fname));
+    getsome.push_front(tmp);
+  }
+
+  int arg_parser::fail(){
+    if (!getsome.size()) return 1;      // is this really correct?
+    return getsome.front()->fail();
+  }
+
+// see if cur arg is a prefix of patt */
+int arg_parser::prefix(const string& kwpatt){
+  if ((cur.length()==0 && kwpatt.length()!=0)) return 0;     // disallow "", unless exact match
+  if (xcase(cur) != xcase(kwpatt).substr(0, cur.length())) return 0;
+  keyword = kwpatt;
+  narg = 0;
+  return 1;
+}
+
+  template<class T=std::string> T arg_parser::nextRaw(){
+    cur = "";
+    for (;;) {
+      if (!getsome.size()) {
+        break;
+      }
+      cur += getsome.front()->get();
+      if (fail()) {
+        // here if read failed;  pop the context
+        getsome.pop_front();
+        // if read failed late;  return what we have
+        if (cur.length()) return Atox<T>(cur);
+        // if read failed early, try again in parent context
+        continue;
+      }
+      size_t len = cur.length();
+      if (len == 0) break;         // zero length, can't have a comma
+      if (cur[len-1] != ',') break;
+      // else has a comma, keep reading
+    }
+    return Atox<T>(cur);
+  }
+
+// return next item, assuming it is an argument:
+template<class T=std::string> T arg_parser::nextArg(){
+  if (0) cerr << "next called with narg: " << narg
+        <<"  kw: " << keyword
+        << endl;
+  T rslt = nextRaw<T>();
+  narg++;
+  if (fail()) {
+    ostringstream buf;
+    buf << "Keyword " << keyword
+        << " requires argument #" << narg
+        << " of type " << myTraits<T>::name;
+    throw invalid_argument(buf.str());
+  }
+  return rslt;
+}
+
+template <typename T>
+arg_parser& arg_parser::operator>>(T& val){
+  val = nextArg<T>();
+  return *this;
+}
+
+// from .h file
+//  template<typename T> T Atox(std::string const &foo);
+
+// specializations and explicit instantiations:
+// The first two lines of this macro help us figure
+// out the /name/ of a type.
+#define setup(T)  \
+  template <> const char* myTraits<T >::name = #T;  /* specialization */   \
+  template struct myTraits<T >;                     /* instantiation  */   \
+  template arg_parser& arg_parser::operator>>(T& val);  \
+  template T Atox<T>(std::string const &arg);           \
+  template T arg_parser::nextRaw<T>();
+
+// Last but not least, instantiate the things we are going to need.
+// I don't know why the compiler can't do this automatically.
+setup(int);
+setup(double);
+setup(float);
+setup(string);
+setup(vector<int>);
+setup(vector<double>);
+setup(complex<double>);
diff --git a/arg_parser.h b/arg_parser.h
new file mode 100644
index 0000000..2ee69c6
--- /dev/null
+++ b/arg_parser.h
@@ -0,0 +1,95 @@
+#ifndef ARG_PARSER_H
+#define ARG_PARSER_H
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <list>
+#include <vector>
+#include <fstream>
+#include <sstream>      // for basic parsing
+                        // and for formatting error msgs
+#include <limits>       // for ::infinity()
+
+// TODO:  use "some_sstream >> some_int" instead of Atoi
+// since that allows more error checking
+
+#ifndef HAVE_INF
+double const Inf = std::numeric_limits<double>::infinity();
+#define HAVE_INF
+#endif
+
+std::string tolower(const std::string& arg);
+
+// This is the interfact that ordinary users use;
+// implementation in arg_parser.c is slightly tricky,
+// but users never see that.
+template<typename T> T Atox(std::string const &foo);
+
+// The base class:
+class baseGetter{
+public:
+  virtual std::string get()=0;
+  virtual int fail()=0;
+  virtual ~baseGetter(){
+//----    cerr << "dtor: baseGetter" << std::endl;
+  };
+  baseGetter(){
+//----    cerr << "basic ctor: baseGetter" << std::endl;
+  }
+};
+
+// A Getter that reads words from the command line:
+class cmdGetter : public baseGetter{
+public:
+  int argc;
+  char const * const * argv;
+  int wasgood;
+
+//ordinary constructor:
+  cmdGetter(int const _argc, char const * const * _argv);
+  virtual std::string get();
+  virtual int fail();
+};
+
+// A Getter that reads words from a file:
+class fileGetter : public baseGetter{
+public:
+  std::fstream file;
+
+//ordinary constructor:
+  fileGetter(const std::string fname);
+  virtual std::string get();
+  virtual int fail();
+  void dummy() {
+    if (Inf);
+  }
+};
+
+class arg_parser{
+public:
+  std::list <std::shared_ptr<baseGetter> > getsome;
+  std::string cur;
+  int fold_case;
+  std::string keyword;
+  int narg;             // number of args picked up using >> after this keyword
+
+// Constructor, for reading words from command line:
+  arg_parser(int const _argc, char const * const * _argv);
+// Constructor, for reading words from file:
+  arg_parser(const std::string fname);
+  void push(const std::string fname);
+  int fail();
+  template<class T=std::string> T nextArg();
+  template<class T=std::string> T nextRaw();
+
+// see if cur arg is a prefix of patt */
+  int prefix(const std::string& patt);
+  template <typename T> arg_parser& operator>>(T& val);
+  std::string xcase(const std::string& arg);
+};
+
+template<typename T> struct myTraits {static const char* name;};
+
+#define arg_parser_h
+#endif
diff --git a/binomial-coefficient-choose.h b/binomial-coefficient-choose.h
new file mode 100644
index 0000000..cd0d91f
--- /dev/null
+++ b/binomial-coefficient-choose.h
@@ -0,0 +1,6 @@
+#ifndef BINOMIAL_COEFFICIENT_CHOOSE_H
+#define BINOMIAL_COEFFICIENT_CHOOSE_H
+
+int binomialCoeff(int const n, int const k);
+
+#endif
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..45009b0
--- /dev/null
+++ b/makefile
@@ -0,0 +1,32 @@
+LDFLAGS := -lnlopt
+
+c_sources := poiss.c arg_parser.c parse_csv.c
+
+% : %.c       # cancel built-in one-step rule
+
+%.o : %.c
+	g++ -g -O2 -Wall $< -c
+
+% : %.o
+	g++ $^ $(LDFLAGS) -o $@
+
+.PHONY : all
+
+## dependency-finding scheme (with local mods) based on:
+## http://www.gnu.org/manual/make-3.77/html_mono/make.html#SEC42
+## (requires an include statement at end of the makefile)
+%.d: %.c
+	@$(SHELL) -ec '$(CXX) -MM $(CXXFLAGS) $< \
+		| sed '\''s/\($*\)\.o[ :]*/\1.o $@ : /g'\'' > $@; \
+		[ -s $@ ] || rm -f $@'
+
+################ end of pattern rules, start of real rules
+
+all : poiss
+
+poiss : poiss.o arg_parser.o parse_csv.o
+	g++ $^ $(LDFLAGS) -o $@
+
+ifndef NO_DOT_D
+  include $(c_sources:.c=.d)
+endif
diff --git a/parse_csv.c b/parse_csv.c
new file mode 100644
index 0000000..12e98f3
--- /dev/null
+++ b/parse_csv.c
@@ -0,0 +1,184 @@
+#include <istream>
+#include <string>
+#include <vector>
+#include <stdexcept>
+#include "parse_csv.h"
+using namespace std;
+
+/*************
+
+Format defined:
+  https://tools.ietf.org/html/rfc4180
+See also:
+  https://en.wikipedia.org/wiki/Comma-separated_values
+
+We depart from the RFC by ignoring \r, so that lines can
+end with either \r\n (dos style) or simply \n (unix style).
+
+We check for malformed quoted fields and throw exceptions.
+
+The last line of the input .csv file may or may not
+contain a newline:
+:; echo -en "xxx\n" | ./parse_csv_demo - | od -b -Anone
+ 170 170 170 012
+:; echo -en "xxx" | ./parse_csv_demo - | od -b -Anone
+ 170 170 170 012
+
+A line containing nothing but the newline has one field
+with a zero-length datum:
+:; echo -en "\n" | ./parse_csv_demo - | od -b -Anone
+ 012
+
+A line containing no data /and/ no newline isn't a line
+at all.  No line, no fields, no data:
+:; echo -en "" | ./parse_csv_demo - | wc
+      0       0       0
+
+************/
+
+string const Q("\"");
+
+enum class CSVState {
+    Start,
+    UnquotedField,
+    QuotedField,
+    QuoteInQuote
+};
+
+// Read one row of .csv table.
+// Return a zero-length vector if we encounter EoF
+//   before seeing any data.
+// Note that a table row can extend across multiple .csv file lines,
+//   if there are embedded newlines.
+//
+// Within quoted fields, we accept commas, quotes, and newlines,
+//   e.g "Hughie, ""Louie"", Dewey".
+template <class datum = qstring>
+vector<datum> readCSVRow(istream& input) {
+  using namespace std;
+  if (input.eof()) return vector<datum>(0);
+  CSVState state = CSVState::Start;
+  vector<datum> fields(0);
+  char c;
+  for (;;) {
+    input.read(&c, 1);
+    if (input.eof()) {
+      if (state == CSVState::QuotedField)
+        throw runtime_error("Unterminated quote in .csv file in field# "
+                               + to_string(fields.size())
+                    + "\n... namely <"
+                    + Q + fields.back() + ">");
+      break;
+    }
+    if (!input.good()) throw runtime_error("input error in .csv file");
+
+// if we made it this far, the line contains at least one datum:
+    if (fields.size() == 0) fields.push_back(datum());
+    switch (state) {
+      case CSVState::Start:
+        switch (c) {
+          case ',':     // zero-length field
+            fields.push_back(datum());
+            break;
+          case '"':
+            state = CSVState::QuotedField;
+// This is no-op if the datum is not of type qstring:
+            set_q(fields.back(), 1);
+            break;
+          case '\r':
+            break;
+          case '\n':
+            goto EndRecord;
+          default:
+            state = CSVState::UnquotedField;
+            fields.back().push_back(c);
+            break;
+        }
+        break;
+      case CSVState::UnquotedField:
+        switch (c) {
+          case ',': // start a new field:
+            state = CSVState::Start;
+            fields.push_back(datum());
+            break;
+          case '"': throw runtime_error
+            ("Stray quote in .csv file in field# "
+               + to_string(fields.size())
+               + "\n... namely <"
+               + fields.back() + Q + ">"
+            );
+            break;
+          case '\r': break;
+          case '\n': goto EndRecord;
+          default:  fields.back().push_back(c);
+            break;
+        }
+        break;
+      case CSVState::QuotedField:
+        switch (c) {
+          case '"': state = CSVState::QuoteInQuote;
+            break;
+// Accept anything except quote here;
+// that includes comma, \r, and \n.
+          default:  fields.back().push_back(c);
+            break;
+        }
+        break;
+// Previous state saw a quote; we consider what follows:
+      case CSVState::QuoteInQuote:
+        switch (c) {
+          case ',':     // comma after closing quote
+            state = CSVState::Start;
+            fields.push_back(datum());
+            break;
+          case '"':     // double ", map to plain "
+            fields.back().push_back('"');
+            state = CSVState::QuotedField;
+            break;
+          case '\r': break;
+          case '\n': goto EndRecord;
+          default:  throw runtime_error
+              ("Extraneous verbiage in .csv file after quoted field# "
+                    + to_string(fields.size())
+                    + "\n... namely <"
+                    + Q + fields.back() + Q
+                    + string(1,c) +">");
+            break;
+        }
+        break;
+    }
+  }
+  EndRecord:;;;;
+  return fields;
+}
+
+// Read entire CSV file.
+// Within quoted fields, we accept commas, quotes, and newlines,
+// e.g "Hughie, ""Louie"", Dewey".
+template <class datum = qstring>
+vector<vector<datum> > readCSV(istream &in) {
+  using namespace std;
+  vector<vector<datum> > table;
+  datum row;
+  while (!in.eof()) {
+    auto fields = readCSVRow<datum>(in);
+    if (fields.size() == 0) break;
+    table.push_back(fields);
+  }
+  return table;
+}
+
+// Explicit instantiation for the <qstring> version.
+// If you want some other version, you'll have to instantiate it.
+template vector<qstring> readCSVRow<qstring>(istream& input);
+template vector<vector<qstring> >readCSV<qstring>(istream &in);
+// Apparently it's not necessary to mention get_q() here,
+// but probably more portable to do it anyway:
+////template int get_q<qstring>(qstring const&);
+
+#if 1
+// Explicit instantiation for the <string> version.
+template vector<string> readCSVRow<string>(istream& input);
+template vector<vector<string> >readCSV<string>(istream &in);
+/////template int get_q<string>(string const&);
+#endif
diff --git a/parse_csv.h b/parse_csv.h
new file mode 100644
index 0000000..362c478
--- /dev/null
+++ b/parse_csv.h
@@ -0,0 +1,34 @@
+#ifndef PARSE_CSV__H
+#define  PARSE_CSV__H
+#include <vector>
+
+struct qstring : public std::string {
+  int q;                // was explicitly quoted
+// Constructors:
+  qstring(): q(0) {}
+  qstring(int const _q): q(_q) {}
+  qstring(std::string const &s): std::string(s), q(0) {}
+  qstring(std::string const &s, int const _q): std::string(s), q(_q) {}
+};
+
+// Helper functions, not class member functions:
+// Simple case, do nothing:
+template<class T> inline void set_q(T& target, int const val) {}
+template<class T> inline int  get_q(T const& target) {return 0;}
+
+// Specialization: Fancy case, actually act on q:
+template<> inline void set_q(qstring& target, int const val)  {
+  target.q = val;
+}
+
+template<> inline int  get_q(qstring const& target){
+  return target.q;
+}
+
+template <class datum = qstring>
+std::vector<datum> readCSVRow(std::istream& input);
+
+template <class datum = qstring>
+std::vector<std::vector<datum> > readCSV(std::istream &in);
+
+#endif
diff --git a/poiss.c b/poiss.c
new file mode 100644
index 0000000..70a20b9
--- /dev/null
+++ b/poiss.c
@@ -0,0 +1,658 @@
+/////////////////////////
+
+#include <iostream>
+using namespace std;
+
+void usage() {
+  cout << R"EoF(Program to fit to Poisson distributed data.
+Uses actual Poisson probability (i.e. not Gaussians, and
+therefore not least squares).
+
+Options include:
+  -h                    # print this message (and immediately exit)
+  -ifile   $fn          # input file, date for -fit
+  -gfile   $fn          # log information about initial guesses for -fit
+  -fit     $fn          # do the fit; send results to $fn
+
+# the rest are for testing and background investigations:
+  -dkcurve $fn          # smooth curves and scattered data at fake abscissas
+  -entropy $fn          # entropy of the Poisson distro
+                        #  as a function of the intensity (lambda)
+  -seed    $str         # seed the RNG with $str
+  -seed    --           # seed the RNG from /dev/random
+
+To write to stdout, use "-" for the $fn.
+
+See ./doit for a working example, including the SS --meld stanzas.
+)EoF";
+}
+
+#include <iomanip>      /* for setw */
+#include <vector>
+#include <nlopt.hpp>    /* the minimum-finder */
+#include "arg_parser.h"
+#include <string>
+#include <sstream>
+#include <map>
+#include "parse_csv.h"
+#include "poissonier.h"
+
+// Used for equal weighting:
+vector<double> const u5(5, 1.0);
+
+// Class to describe a sitch, i.e. a situation, i.e.
+// everything the objective function needs to know:
+struct sitcher {
+  vector<double> norm;    // keep the adjustable parameters close to unity
+  vector<double> t1;
+  vector<double> t2;
+  vector<int> obs;      // actual observed counts in the interval [t1, t2]
+};
+
+// Instantaneous rate at time ttt.
+// Takes parameters two at a time: (amplitude, decay rate).
+// If there are an odd number of parameters,
+// the last one is the baseline, i.e. independent of time,
+// equivalent to a decay with the rate locked at zero.
+// Decay rates are in nats per second.
+double inst_multi_exp(vector<double> const& prm,
+         double const ttt, vector<double> const norm) {
+  double rslt(0);
+  int nn = prm.size();
+  for (int ii = 0; ii < nn; ii+=2){
+    double amp(prm[ii] * norm[ii]);
+    double rate(1+ii < nn ? prm[1+ii] * norm[1+ii] : 0.);
+    if (rate) {
+      double rate(prm[1+ii]);
+// use the negative rate:
+      rslt += amp * exp(- rate * ttt);
+    } else {
+      rslt += amp;
+    }
+  }
+  return rslt;
+}
+
+// Format all the elements of a C++ vector,
+// creating a string suitable for printing.
+template<class T>
+string dump(vector<T> const arg) {
+  stringstream rslt;
+  string sep = "";
+  for (T val : arg) {
+    rslt << sep << val;
+    sep = ", ";
+  }
+  return rslt.str();
+}
+
+// Format all the elements of a counted array,
+// creating a string suitable for printing.
+template<class T>
+string dump(int const nn, T const arg[]) {
+  stringstream rslt;
+  string sep = "";
+  for (int ii = 0; ii < nn; ii++) {
+    rslt << sep << arg[ii];
+    sep = ",";
+  }
+  return rslt.str();
+}
+
+// numerically accurate sinh(arg)/arg
+template<class T>
+inline T sinhc(T arg) {
+  if (abs(arg) < 1e-4) return 1 + arg*arg/6;
+  return sinh(arg)/arg;
+}
+
+struct evendor {
+  double t;               // time
+  double n;               // number of events
+};
+
+// Intensity, usually denoted lambda,
+// i.e. integral of rate(t) dt over the given interval.
+double inty_multi_exp(vector<double> const& prm,
+        double const t1, double const t2, vector<double> const norm) {
+  double rslt(0);
+  int nn = prm.size();
+  for (int ii = 0; ii < nn; ii+=2){
+    double amp(prm[ii] * norm[ii]);
+    double rate(1+ii < nn ? prm[1+ii] * norm[1+ii] : 0.);
+    double mid((t2 + t1) / 2.);
+    double tau(t2 - t1);
+    rslt += amp * exp(-rate*mid) * tau * sinhc(rate*tau/2.);
+  }
+  return rslt;
+}
+
+// the actual objective function
+// returns total log probability, summed over all data points
+//
+// _sitch is unchanged,
+// but cannot be declared const, because bobyqa wouldn't like that.
+double limp(unsigned int const nprm, double const* _prm,
+        double * _grad, void * _sitch) {
+  sitcher& sitch(*(sitcher*)(_sitch));
+  if (_grad != 0) throw invalid_argument("grad");
+  if (sitch.norm.size() != nprm) throw invalid_argument("norm");
+  vector<double> prm(_prm, _prm+nprm);
+  if (prm.size() != nprm) throw logic_error("vector ctor");
+  double rslt(0);
+  int npts(sitch.t1.size());
+  for (int ii = 0; ii < npts; ii++){
+    double inty = inty_multi_exp(prm, sitch.t1[ii], sitch.t2[ii], sitch.norm);
+    rslt -= poissonier::lpmd(inty, sitch.obs[ii]);
+  }
+  return rslt;
+}
+
+// Scan the data array,
+// looking for records that look like headers
+// (as opposed to numerical data).
+int count_header(vector<vector<string> > const aoa){
+  int NR = aoa.size();
+  int hh = 0;
+  for (; hh < NR; hh++) {
+    if (aoa[hh].size() == 0) continue;
+    string word = aoa[hh][0];
+    size_t where;
+    where = word.find_first_not_of(" ");
+    if (where == string::npos) continue;
+    word = word.substr(where);
+    try {
+      stod(word, &where);       // don't care about return value
+    }
+    catch (exception& ee) {
+      continue;
+    }
+    word = word.substr(where);
+    where = word.find_first_not_of(" ");
+    if (where == string::npos) break;
+  }
+  return hh;
+}
+
+// Some tricky heuristics.
+// Guess a starting point for the fitting process
+// (i.e. minimization process).
+// A good guess speeds things up and greatly increases
+// the chance of finding the global minimum (as opposed
+// to getting stuck in a worthless local minimum, or
+// diverging to arrant nonsense).
+struct guesser {
+  int hh;
+  int npts;
+  vector<evendor> evt;
+  vector<double> model;
+  sitcher sitch;
+
+  guesser() : hh(0), npts(0) {}
+
+  void setup(vector<vector<string> > const aoa) {
+    int NR = aoa.size();         // includes header row
+    if (NR == 0) throw invalid_argument("Zero-length input file");
+    hh = count_header(aoa);
+    npts = NR - hh;
+    if (0) cout << "NR: " << NR
+         << "  npts: " << npts
+         << endl;
+    evt = vector<evendor>(npts);
+    double minute(60);
+    for (int ii = 0; ii < npts; ii++) {
+  // file times are in minutes; convert to SI units here:
+      evt[ii].t = stod(aoa[hh+ii][0]) * minute;
+      evt[ii].n = stod(aoa[hh+ii][1]);
+    }
+
+    double max_t = evt[npts-1].t;
+    double max_n = evt[npts-1].n;
+
+  // starting point of the baseline estimate,
+  // as a fraction of the total span of the data,
+  // assuming data starts at zero:
+    double tail_frac = 0.9;
+    int tail_i = -1;
+
+  // half life (in seconds) of the fast component:
+    double fast_h = 307.2;
+    double fast_a = M_LN2 / fast_h;
+    int fast_i = -1;
+
+  // half life (in seconds) of the slow component:
+    double slow_h = 45720;
+    double slow_a = M_LN2 / slow_h;
+
+    double dead(5);       // 2**-5 = 3%
+
+    for (int ii = 0; ii < npts; ii++) {
+      double time = evt[ii].t;
+      if (time < tail_frac * max_t) tail_i = ii;
+      if (time < fast_h * dead) fast_i = ii;
+    }
+
+    if (0) cout << "npts: " << npts
+          << " : " << max_t
+          << "," << max_n
+          << endl;
+
+    double tail_t = evt[tail_i].t;
+    double tail_n = evt[tail_i].n;
+    double tail_r = (max_n - tail_n) / (max_t - tail_t);
+
+    if (0) cout << "tail_i:   " << tail_i
+          << " : " << tail_t
+          << ","   << tail_n
+          << "    " << tail_r
+          << endl;
+
+    double fast_t = evt[fast_i].t;
+    double fast_n = evt[fast_i].n;
+    if (0) cout << "fast_i:   " << fast_i
+          << " : " << fast_t
+          << ","   << fast_n
+  //        << "    " << fast_r
+          << endl;
+
+    if (tail_i < 0) throw invalid_argument("wtf?");
+    if (fast_i < 0) throw invalid_argument("wtf?");
+
+  // beginning times:
+    double slow_tb = evt[fast_i].t;
+    double slow_nb = evt[fast_i].n;
+
+    double slow_dt = max_t - slow_tb;
+    double slow_dn = max_n - slow_nb;
+    double slow_mag = slow_dn - slow_dt * tail_r;
+    slow_mag /= (1 - exp(-slow_a * slow_dt) * (1 + slow_dt * slow_a));
+  // valid at time slow_tb:
+    double slow_rx = slow_mag * slow_a;
+  // valid at time zero:
+    double slow_r = slow_rx * exp(slow_a * slow_tb);
+
+    double slow_rem = slow_r * exp(-slow_a * max_t);
+    double bl_r = tail_r - slow_rem;
+
+    if (0) cout << "slow_r: " << slow_r
+          << "  slow_rx: " << slow_rx
+          << "  slow_mag: " << slow_mag
+          << "  slow_rem: " << slow_rem
+          << "  tail_r: " << tail_r
+          << "  bl_r: " << bl_r
+          << endl;
+
+    vector<double> slow_model{slow_r, slow_a, bl_r};
+
+    double fast_t0 = evt[0].t;
+    double fast_n0 = evt[0].n;
+    double fast_t1 = evt[fast_i].t;
+    double fast_n1 = evt[fast_i].n;
+    double model_inty  = inty_multi_exp(slow_model, fast_t0, fast_t1, u5);
+    double fast_tot = fast_n1 - fast_n0;
+    double fast_dn = fast_tot - model_inty;
+    double fast_r = fast_dn * fast_a;
+    if (0) cout << "fast_r: " << fast_r
+           << " fast_dn: " << fast_dn
+          << "  fast_tot: " << fast_tot
+          << "  model_inty: " << model_inty
+          << endl;
+
+    model = vector<double>{fast_r, fast_a};
+    model.insert(model.end(), slow_model.begin(), slow_model.end());
+  }
+
+// Reformat the data, to make it more useful to
+// the objective function:
+  void more() {
+    for (int ii = 1; ii < npts; ii++) {
+      sitch.t1.push_back(evt[ii-1].t);
+      sitch.t2.push_back(evt[ii  ].t);
+      sitch.obs.push_back(evt[ii].n - evt[ii-1].n);
+    }
+  }
+
+// Show the guessed parameters.
+// Also do a sweep of one variable, as a qualitative sanity check.
+  void show(string const ofile) {
+    if (ofile == "") return;
+    ofstream xxout;
+    if (ofile != "-") {
+      xxout.open(ofile);
+    }
+    ostream& xout(ofile != "-" ? xxout : cout);
+
+    size_t ss = sitch.t1.size();
+    xout << ss << endl;
+    xout << sitch.t1[ss-1]
+          << "  " << sitch.t2[ss-1]
+          << "  " << sitch.obs[ss-1]
+          << endl;
+
+    vector<double> prm(u5);
+
+    xout << dump(model) << endl;
+
+    xout << "bl/norm,bl,limp" << endl;
+    double bl0 = prm[4];
+    void* context = (void*)(&sitch);
+    int nprm(prm.size());
+
+    for (double bl = bl0*.9; bl <= bl0*1.1; bl += bl/100.) {
+      prm[4] = bl;
+      double limpy = limp(nprm, prm.data(), 0, context);
+      xout << bl
+          << "," << bl*sitch.norm[4]
+          << "," << limpy << endl;
+    }
+  }
+};
+
+// Decode result codes returned by nlopt functions.
+// This is documented to be part of the nlopt library
+// but is absent from the version I have.
+const char *nlopt_result_to_string(nlopt_result result)
+{
+  switch(result)
+  {
+    case NLOPT_FAILURE: return "FAILURE";
+    case NLOPT_INVALID_ARGS: return "INVALID_ARGS";
+    case NLOPT_OUT_OF_MEMORY: return "OUT_OF_MEMORY";
+    case NLOPT_ROUNDOFF_LIMITED: return "ROUNDOFF_LIMITED";
+    case NLOPT_FORCED_STOP: return "FORCED_STOP";
+    case NLOPT_SUCCESS: return "SUCCESS";
+    case NLOPT_STOPVAL_REACHED: return "STOPVAL_REACHED";
+    case NLOPT_FTOL_REACHED: return "FTOL_REACHED";
+    case NLOPT_XTOL_REACHED: return "XTOL_REACHED";
+    case NLOPT_MAXEVAL_REACHED: return "MAXEVAL_REACHED";
+    case NLOPT_MAXTIME_REACHED: return "MAXTIME_REACHED";
+///???????    case NLOPT_NUM_RESULTS: return NULL;
+  }
+  return NULL;
+}
+
+// Add something to a particular component of a vector.
+template<class T>
+vector<T> goose(vector<T> const& vvv, int const ii, T const delta) {
+  vector<T> rslt(vvv);
+  rslt[ii] += delta;
+  return rslt;
+}
+
+void do_fit(poissonier& poi, string const ifile,
+        string const ofile, string const gfile) {
+  if (ifile == "") {
+    if (ofile != "") throw invalid_argument("do_fit needs an input file");
+    return;
+  }
+
+  ifstream inx;
+  if (ifile != "-") {
+    inx.open(ifile);
+    if (! inx.good()) {
+      cerr << "Cannot open input '" << ifile << "'" << endl;
+      exit(1);
+    }
+  }
+
+  istream& in(ifile == "-" ? cin : inx);
+  vector<vector<string> > aoa;
+  aoa = readCSV<string>(in);
+  guesser ggg;
+  ggg.setup(aoa);
+  ggg.more();
+  ggg.sitch.norm = ggg.model;
+  ggg.show(gfile);
+
+  vector<double> prm = u5;
+  int nprm(prm.size());
+  vector<double> const lower(nprm, 0.5);
+  vector<double> const upper(nprm, 1.5);
+
+  nlopt_result rslt = NLOPT_FORCED_STOP; // avoid "unused" warning
+  double limp_end(-9e99);
+  vector<double> found(prm);       // will get modified in place
+  int OK(0);
+  try {
+    void* context = (void*)(&ggg.sitch);
+    nlopt_opt oppy = nlopt_create(NLOPT_LN_BOBYQA, nprm);
+    rslt = nlopt_set_lower_bounds1(oppy, 0.5);
+    rslt = nlopt_set_upper_bounds1(oppy, 1.5);
+    rslt = nlopt_set_min_objective(oppy, limp, context);
+    rslt = nlopt_set_xtol_rel(oppy, 1e-7);
+    rslt = nlopt_optimize(oppy, found.data(), &limp_end);
+    OK = 1;
+  }
+  catch (exception& eee) {
+    cout << "Fitting bombed out: " << eee.what() << endl;
+  }
+
+  if (OK && ofile != "") {
+    ofstream xxout;
+    if (ofile != "-") {
+      xxout.open(ofile);
+    }
+    ostream& xout(ofile != "-" ? xxout : cout);
+
+    double DoF(ggg.npts - nprm);
+    xout << ifile << endl;
+    cout << "Fit returns:, " << rslt
+          << ", i.e., " << nlopt_result_to_string(rslt)
+          << ",   limp:, " << limp_end
+          << ",   perdof:, " << limp_end / (ggg.npts - nprm)
+          << endl;
+    xout << "Fit returns:, " << rslt
+          << ", i.e., " << nlopt_result_to_string(rslt)
+          << ",   limp:, " << limp_end
+          << ",   perdof:, " << limp_end / DoF
+          << endl;
+    xout << endl;
+    xout << ",fast.amp, fast.dk, slow.amp, slow.dk, bl" << endl;
+    xout << "Normed:, " << dump(found) << endl;
+    vector<double> combi(nprm);
+    for (int ii = 0; ii < nprm; ii++) {
+      combi[ii] = found[ii] * ggg.sitch.norm[ii];
+    }
+    xout << "SI:,     " << dump(combi) << endl;
+    vector<double> flip(combi);
+    flip[1] = M_LN2/combi[1];
+    flip[3] = M_LN2/combi[3];
+    xout << "½life:,  " << dump(flip) << endl;
+
+// Calculate the uncertainties.
+// In particular, calculate the Mahalanobis metric
+// i.e. the second derivative (Hessian) of the log improbability.
+    vector<vector<double> > covar(nprm, vector<double>(nprm));
+    vector<double> probe(u5);
+    sitcher sitch(ggg.sitch);
+    sitch.norm = combi;
+    void* context = (void*)(&sitch);
+    double delta(0.01);
+    xout << endl;
+    for (int ii = 0; ii < nprm; ii++) {
+      string sep = "";
+      for (int jj = 0; jj < nprm; jj++) {
+        double maha;
+        if (ii != jj) {
+          maha = limp(nprm, goose(goose(
+            probe, ii,  delta), jj,  delta).data(), 0, context)
+              + limp(nprm, goose(goose(
+            probe, ii, -delta), jj, -delta).data(), 0, context)
+              - limp(nprm, goose(goose(
+            probe, ii,  delta), jj, -delta).data(), 0, context)
+              - limp(nprm, goose(goose(
+            probe, ii, -delta), jj,  delta).data(), 0, context);
+            maha = maha/4./delta/delta;
+        } else {
+          maha = limp(nprm, goose(probe, ii,  delta).data(), 0, context)
+              + limp(nprm, goose(probe, ii, -delta).data(), 0, context)
+              -2.0*limp(nprm, probe.data(), 0, context);
+          maha = maha/delta/delta;
+        }
+        xout << sep << setprecision(3) << setw(11) << fixed << maha;
+        sep = ", ";
+      }
+      xout << endl;
+    }
+  }
+}
+
+// Crude reconnaissance.
+// Calculate the decay curves using made-up parameters.
+// Not in the critical path.
+void do_dk(poissonier& poi, string const ofile) {
+  if (ofile == "") return;
+  ofstream xxout;
+  if (ofile != "-") {
+    xxout.open(ofile);
+  }
+  ostream& xout(ofile != "-" ? xxout : cout);
+  sitcher sitch;        // selected abscissas
+  sitcher smooth;       // all abscissas
+
+  double step(1);
+  vector<double> prm( {30, .1, 10, .01, 1} );
+
+  int dt(5);
+  for (int ii = 0; ii < 500; ii+= dt) {
+    double tt1(step*ii);
+    double tt2(tt1 + dt);
+    double intensity(inty_multi_exp(prm, tt1, tt2, u5));
+    int obs = poi.sample(intensity);
+    sitch.t1.push_back(tt1);
+    sitch.t2.push_back(tt2);
+    sitch.obs.push_back(obs);
+  }
+
+  for (int ii = 0; ii < 500; ii++) {
+    smooth.t1.push_back(step*ii);
+  }
+
+// here with both sitchers fully constructed.
+
+  xout << "Seed:," << poi.graine << endl;
+// output both the smooth curves
+// and the observations (to the extent they exist)
+  for (int ii = 0; ii < 500; ii++) {
+    double tt1(smooth.t1[ii]);
+    double sminty(inst_multi_exp(prm, tt1, u5));
+    xout << tt1 << "," << sminty;
+    if (ii < (int) sitch.t1.size()) {
+      double fake(max(0.+sitch.obs[ii], 0.2));   // to facilitate log axes
+      double tt1(sitch.t1[ii]);
+      double tt2(sitch.t2[ii]);
+      double minty(inty_multi_exp(prm, tt1, tt2, u5));
+      xout << "," << tt1
+           << "," << tt2
+           << ",x"
+           << "," << minty
+           << "," << minty
+           << ",x"
+           << "," << fake
+           << "," << fake
+           << ",x";
+    }
+    xout << endl;
+  }
+}
+
+// Class used by do_entropy.
+// Use a class rather than a plain function,
+// because it returns multiple results.
+struct entroper {
+ double ptot, stot, ii;
+ void go(double const rw_obs, double const rw_mod) {
+    ptot = 0;
+    stot = 0;
+    int ii;
+    int lim = int(ceil(rw_obs));
+    lim *= 10;
+    for (ii = 0; ii < lim; ii++) {
+      if (ptot > .999999) break;
+      double pobs = poissonier::pmd(rw_obs, ii);
+      if (pobs != 0) {
+        double lpmod = poissonier::lpmd(rw_mod, ii);
+        ptot += pobs;
+        stot -= pobs * lpmod;
+      } else {
+        // don't do a calculation that could
+        // possibly multiply zero by -infinity.
+      }
+    }
+  }
+};
+
+// Out of curiosity, calculate the entropy of the Poisson distribution
+// as a function of intensity (lambda).
+// Not in the critical path.
+void do_entropy(string const ofile) {
+  if (ofile == "") return;
+  ofstream xxout;
+  if (ofile != "-") {
+    xxout.open(ofile);
+  }
+  ostream& xout(ofile != "-" ? xxout : cout);
+
+  double Delta(1.01);
+  xout << "Delta," << Delta << endl;
+  xout << "rw, ii, ptot, Scat, Sdog, Semu" << endl;
+  double ratio(pow(10., .005));
+  entroper cat, dog, emu;
+  for (double rw = 0.01; rw < 1500; rw *= ratio) {
+    cat.go(rw, rw);
+    dog.go(rw, rw/Delta);
+    emu.go(rw, rw*Delta);
+    xout << rw
+        << "," << cat.ptot
+        << "," << cat.ii
+        << "," << cat.stot
+        << "," << dog.stot
+        << "," << emu.stot
+        << endl;
+  }
+}
+
+int main(int argc, char * const * argv) {
+  poissonier poi;
+  string seed;
+  string ifile;
+  arg_parser arg(argc, argv);
+  arg.fold_case = 1;
+  string progname = arg.nextRaw();
+  string dkfile, entfile, fitfile, gfile;
+
+  for (;;){
+    string raw_arg = arg.nextRaw();         // get next keyword
+    if (arg.fail()) break;
+    int where = arg.cur.length();
+    if (where){
+      if (arg.cur[where-1] == ':') arg.cur = arg.cur.substr(0,where-1);
+    }
+    if (0) {}
+    else if (arg.prefix("help") || arg.prefix("-h")) {
+      usage();
+      exit(0);
+    }
+    else if (arg.prefix("-ifile")) arg >> ifile;
+    else if (arg.prefix("-seed")) arg >> seed;
+    else if (arg.prefix("-entropy")) arg >> entfile;
+    else if (arg.prefix("-dkcurve")) arg >> dkfile;
+    else if (arg.prefix("-gfile")) arg >> gfile;
+    else if (arg.prefix("-fit")) arg >> fitfile;
+    else {
+      cerr << "Unrecognized command '" << raw_arg << "'" << endl;
+      exit(1);
+    }
+  }
+  if (seed == "--") seed = poi.randomize();
+  else if (seed != "") poi.seed(seed);
+
+  if (!(   fitfile.length()
+        || dkfile.length()
+        || entfile.length()
+     ))
+         dkfile = "-";
+  do_dk(poi, dkfile);
+  do_fit(poi, ifile, fitfile, gfile);
+  do_entropy(entfile);
+}
diff --git a/poissonier.h b/poissonier.h
new file mode 100644
index 0000000..95da15e
--- /dev/null
+++ b/poissonier.h
@@ -0,0 +1,108 @@
+#ifndef POISSONIER_H
+#define POISSONIER_H
+
+#include <random>       /* for seed_seq */
+#include <iostream>
+#include <time.h>       /* for clock_gettime */
+#include <vector>
+#ifdef BOOST_CPD
+#  include <boost/math/distributions/poisson.hpp>
+#endif
+
+static char const printable[64+1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                 "abcdefghijklmnopqrstuvwxyz"
+                 "0123456789._";
+
+std::string base64(unsigned long int arg) {
+  std::string str;
+  for (;;) {
+    str = printable[arg & 63] + str;
+    arg >>= 6;
+    if (! arg) break;
+  }
+  return str;
+}
+
+std::string randomstr(unsigned int const nn=15) {
+  using namespace std;
+// FIXME: ??? Could maybe use std::bitset here:
+  random_device rd;
+  string str;
+  while (str.length() < nn) {
+    unsigned int foo(rd());
+    int nch = (sizeof(foo) * 8) / 6;
+    for (int ii = 0; ii < nch; ii++) {
+      str += printable[foo & 63];
+      foo >>= 6;
+    }
+  }
+  return str;
+}
+
+class poissonier {
+public:
+  std::mt19937 basegen;
+  std::string graine;           // last seed fed to basegen
+
+// constructor
+  poissonier () {
+    timespec time;
+    clock_gettime(CLOCK_REALTIME, &time);
+    graine = base64(time.tv_sec);
+    graine += base64(time.tv_nsec);
+    seed(graine);
+  }
+  double sample(double const lambda) {
+// There is no way to change the intensity after the
+// distro is constructed, so construct a new one each time:
+    return std::poisson_distribution<>(lambda)(basegen);
+  }
+  void seed(std::string const seed) {
+// Need a temporary variable here, because the seed() function
+// modifies it:
+    graine = seed;
+    std::seed_seq seed1(seed.begin(), seed.end());
+    basegen.seed(seed1);
+  }
+  std::string randomize() {
+    graine = randomstr(15);
+    seed(graine);
+    return graine;
+  }
+  void check(double const rate) {
+    using namespace std;
+    cout << "Checking: " << rate
+        << "  gives: " << sample(rate)
+        << endl;
+  }
+
+// Note this is stateless. Static member function.
+//
+// Log of the probability mass distribution:
+  static double lpmd(double const lambda, int const k){
+    // https://en.wikipedia.org/wiki/Poisson_distribution#Definition
+    if (lambda < 0) throw std::invalid_argument("negative Poisson intensity");
+    if (lambda == 0) {
+      if (k == 0) return 0;     // 100% probability
+      return -INFINITY;         //   0% probability
+    }
+    return k * log(lambda) - lambda - lgamma(k + 1.0);
+  }
+
+// pmd = probability mass distribution
+  inline static double pmd(double const lambda, int const k){
+    return exp(lpmd(lambda, k));
+  }
+
+#ifdef BOOST_CPD
+// not implemented
+// not needed, and would require boost
+// cpd = cumulative probability distribution
+  static double cpd(double const lambda, int const k){
+    boost::math::poisson_distribution<> pd(lambda);
+    return boost::math::cpd(pd, k);
+  }
+#endif
+};
+
+#endif /* POISSONIER_H */
author	John Denker <jsd@av8n.com>	2021-10-17 10:10:18 -0700
committer	John Denker <jsd@av8n.com>	2021-10-17 11:09:24 -0700
commit	74ddd0381aa1b1a90eb0d5300fa576cb2348eeac (patch)
tree	72a9dded6f800467d52e479eb37574e6de5f2e6c
parent	634d365a03cb0581a062cd3cf4db9ae69f1cde26 (diff)