From ba5331773afd59530b598fb51b188203bb323bb5 Mon Sep 17 00:00:00 2001 From: John Denker Date: Mon, 18 Oct 2021 14:09:50 -0700 Subject: add program to perform SVD (singular value decomposition) --- README.md | 4 +- makefile | 11 ++- svd.c | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 280 insertions(+), 5 deletions(-) create mode 100644 svd.c diff --git a/README.md b/README.md index c43e61a..e38117e 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,9 @@ therefore not least squares). Download using:
``git clone https://www.av8n.com/cgit/poissfit`` -Requires the `libnlopt-cxx-dev` package, which is easily installable on Ubuntu and similar platforms. +The fitting program requires the `libnlopt-cxx-dev` package. +The SVD program requires `libarmadillo-dev` package. +Both of those are easily installable on Ubuntu and similar platforms. In the poissfit directory, compile using a simple `make` command. That works on my Ubuntu platform. Other platforms are likely diff --git a/makefile b/makefile index 45009b0..13b8e92 100644 --- a/makefile +++ b/makefile @@ -1,6 +1,6 @@ -LDFLAGS := -lnlopt -c_sources := poiss.c arg_parser.c parse_csv.c +c_mains := poiss.c svd.c +c_sources := $(c_mains) arg_parser.c parse_csv.c % : %.c # cancel built-in one-step rule @@ -22,10 +22,13 @@ c_sources := poiss.c arg_parser.c parse_csv.c ################ end of pattern rules, start of real rules -all : poiss +all : $(c_mains:.c=) poiss : poiss.o arg_parser.o parse_csv.o - g++ $^ $(LDFLAGS) -o $@ + g++ $^ -lnlopt -o $@ + +svd : svd.o arg_parser.o parse_csv.o + g++ $^ -larmadillo -o $@ ifndef NO_DOT_D include $(c_sources:.c=.d) diff --git a/svd.c b/svd.c new file mode 100644 index 0000000..703b832 --- /dev/null +++ b/svd.c @@ -0,0 +1,270 @@ +///////////////////////// + +#include +using namespace std; + +void usage() { + cout << R"EoF(Program to fit to perform Singular Value Decomposition (SVD). +Typical usage: + ./svd -m -1 -ifile whatever.csv -svd eigen.csv + +Options include: + -h # print this message (and immediately exit) + -ifile $fn # input data file + -svd $fn # write the results to $fn + -meta NN # use NNth line of input header as metadata + # NN=-1 means use *last* line of input header + +To write to stdout, use "-" for the $fn. +)EoF"; +} + +#include /* for setw */ +#include +#include "arg_parser.h" +#include +#include +#include +#include "parse_csv.h" +#include "poissonier.h" +#include + + +// Format all the elements of a C++ vector, +// creating a string suitable for printing. +template +string dump(vector const arg) { + stringstream rslt; + string sep = ""; + for (T val : arg) { + rslt << sep << val; + sep = ", "; + } + return rslt.str(); +} + +// Format all the elements of a counted array, +// creating a string suitable for printing. +template +string dump(int const nn, T const arg[]) { + stringstream rslt; + string sep = ""; + for (int ii = 0; ii < nn; ii++) { + rslt << sep << arg[ii]; + sep = ","; + } + return rslt.str(); +} + +// numerically accurate sinh(arg)/arg +template +inline T sinhc(T arg) { + if (abs(arg) < 1e-4) return 1 + arg*arg/6; + return sinh(arg)/arg; +} + +struct evendor { + double t; // time + double n; // number of events +}; + +// Intensity, usually denoted lambda, +// i.e. integral of rate(t) dt over the given interval. +double inty_multi_exp(vector const& prm, + double const t1, double const t2, vector const norm) { + double rslt(0); + int nn = prm.size(); + for (int ii = 0; ii < nn; ii+=2){ + double amp(prm[ii] * norm[ii]); + double rate(1+ii < nn ? prm[1+ii] * norm[1+ii] : 0.); + double mid((t2 + t1) / 2.); + double tau(t2 - t1); + rslt += amp * exp(-rate*mid) * tau * sinhc(rate*tau/2.); + } + return rslt; +} + +// Add something to a particular component of a vector. +template +vector goose(vector const& vvv, int const ii, T const delta) { + vector rslt(vvv); + rslt[ii] += delta; + return rslt; +} + +typedef vector > VVS; + +void dump_matrix(ostream& ouch, arma::mat const &mmm, + int const pr = 3, int const texmode=0){ + int wd(pr+6); + int dim(mmm.n_rows); + for (int ii = 0; ii < dim; ii++) { + for (int jj = 0; jj < dim; jj++) { + if (texmode) { + if ( mmm(ii, jj) > 3 ) + ouch << "\\y{"; /* } */ + else ouch << "\\x{"; /* } */ + ouch << setprecision(pr) << setw(wd) << mmm(ii, jj) + /* { */ << "} "; + } else { + ouch << setprecision(pr) << setw(wd) << mmm(ii, jj) << ", "; + } + } + if (texmode) { + ouch << " \\z" << endl; + } else { + ouch << endl; + } + } +} + +string dump(arma::mat const mmm, string const prefix="") { + int NR = mmm.n_rows; + int NC = mmm.n_cols; + stringstream rslt; + for (int ii = 0; ii < NR; ii++) { + string sep = prefix; + for (int jj = 0; jj < NC; jj++) { + rslt << sep << fixed << setw(11) << mmm(ii, jj); + sep = ", "; + } + rslt << endl; + } + return rslt.str(); +} + +string dump(arma::vec vvv, arma::mat const mmm) { + int NR = mmm.n_rows; + int NC = mmm.n_cols; + stringstream rslt; + for (int ii = 0; ii < NR; ii++) { + rslt << fixed << setw(11) << vvv(ii) + << ","; // skip a column + for (int jj = 0; jj < NC; jj++) { + rslt << "," << fixed << setw(11) << mmm(ii, jj); + } + rslt << endl; + } + return rslt.str(); +} + +string dumpx(arma::vec vvv, arma::mat const mmm) { + int NR = mmm.n_rows; + int NC = mmm.n_cols; + stringstream rslt; + for (int ii = 0; ii < NR; ii++) { + double tmp = vvv(ii); + rslt << fixed << setw(11) << tmp; + rslt << ","; // skip a column + for (int jj = 0; jj < NC; jj++) { + rslt << "," << fixed << setw(11) << mmm(ii, jj); + } + rslt << endl; + } + return rslt.str(); +} + +void do_svd(string const ifile, string const ofile, + int const gotmeta, int whichmeta){ + if (ifile == "") { + if (ofile != "") throw invalid_argument("do_fit needs an input file"); + return; + } + + ifstream inx; + if (ifile != "-") { + inx.open(ifile); + if (! inx.good()) { + cerr << "Cannot open input '" << ifile << "'" << endl; + exit(1); + } + } + + istream& in(ifile == "-" ? cin : inx); + VVS alldata; + alldata = readCSV(in); + cout << alldata.size() << endl; + int hh = count_header(alldata); + cout << hh << endl; + VVS aoa(alldata.begin()+hh, alldata.end()); + int NR = aoa.size(); + if (NR == 0) throw invalid_argument("no data???"); + int NC = aoa[0].size(); + cout << "NR: " << NR + << " NC: " << NC << endl; + arma::mat maha(NR, NC, arma::fill::zeros); + for (int ii = 0; ii < NR; ii++) { + for (int jj = 0; jj < NC; jj++) { + maha(ii, jj) = stod(aoa[ii][jj]); + } + } + + if (ofile != "") { + ofstream xxout; + if (ofile != "-") { + xxout.open(ofile); + } + ostream& xout(ofile != "-" ? xxout : cout); + + string meta; + if (gotmeta) { + if (whichmeta < 0) whichmeta += hh; + meta = dump(alldata[whichmeta]); + } + + xout << ",," << meta << endl; + xout << dump(maha, ",,"); // already includes newlines + + arma::mat U, V; + arma::vec eival; + arma::svd(U, eival, V, maha); + + xout << endl; + xout << "cost²,," << meta << endl; + xout << dumpx(eival, V) << endl; + } +} + +int main(int argc, char * const * argv) { + poissonier poi; + string seed; + string ifile; + arg_parser arg(argc, argv); + arg.fold_case = 1; + string progname = arg.nextRaw(); + string svdfile; + int gotmeta(0); + int whichmeta(0); + + for (;;){ + string raw_arg = arg.nextRaw(); // get next keyword + if (arg.fail()) break; + int where = arg.cur.length(); + if (where){ + if (arg.cur[where-1] == ':') arg.cur = arg.cur.substr(0,where-1); + } + if (0) {} + else if (arg.prefix("help") || arg.prefix("-h")) { + usage(); + exit(0); + } + else if (arg.prefix("-meta")) { + gotmeta++; + arg >> whichmeta; + } + else if (arg.prefix("-ifile")) arg >> ifile; + else if (arg.prefix("-seed")) arg >> seed; + else if (arg.prefix("-svd")) arg >> svdfile; + else { + cerr << "Unrecognized command '" << raw_arg << "'" << endl; + exit(1); + } + } + if (seed == "--") seed = poi.randomize(); + else if (seed != "") poi.seed(seed); + + if (!(svdfile.length() + )) + svdfile = "-"; + do_svd(ifile, svdfile, gotmeta, whichmeta); +} -- cgit v1.2.3