// Timing with clock doesn't work.  Apparently clock adds the clock
// ticks on all processors.

#include "libscl.h"
#include <omp.h>
#include <ctime>

using namespace std;
using namespace scl;

int main(int argc, char** argp, char** envp)
{
  clock_t time_start, time_stop;
      
  INT_32BIT seed = 12345;

  const INTEGER arows = 10000;
  const INTEGER acols = 200;
  const INTEGER brows = acols;
  const INTEGER bcols = 1000;
  const INTEGER rrows = arows;
  const INTEGER rcols = bcols;

  const INTEGER asize = arows*acols;
  const INTEGER bsize = brows*bcols;
  const INTEGER rsize = rrows*rcols;

  cout << '\n';
  cout << "arows = " << arows << '\n';
  cout << "acols = " << acols << '\n';
  cout << "bcols = " << bcols << '\n';
  cout << '\n';

  realmat a(arows,acols);
  realmat b(brows,bcols);
  realmat r(rrows,rcols);

  for (INTEGER i=1; i<=asize; ++i) a[i] = unsk(seed);
  for (INTEGER i=1; i<=bsize; ++i) b[i] = unsk(seed);

  for (INTEGER i=1; i<=rsize; ++i) r[i] = 0.0;

  time_start = clock();
  for (INTEGER j=1; j<=bcols; ++j) {
    for (INTEGER k=1; k<=acols; ++k) {
      for (INTEGER i=1; i<=arows; ++i) {
        r(i,j) += a(i,k)*b(k,j);
      }
    }
  }
  time_stop = clock();
  cout<<"mult time = " << time_stop - time_start << '\n';

  REAL rnorm = 0.0;
  for (INTEGER i=1; i<=rsize; ++i) rnorm += pow(r[i],2);
  rnorm = sqrt(rnorm);
  cout<<"rnorm = " << rnorm << '\n';

  INTEGER procs = omp_get_num_procs();
  INTEGER maxt = omp_get_max_threads();
  INTEGER chunk = bcols/maxt > 1 ? bcols/maxt : 1;

  cout << '\n';
  cout << "Number of processors = " << procs << '\n';
  cout << "Max threads = " <<  maxt  << '\n';
  cout << "Value of chunk = " <<  chunk  << '\n';
  cout << "Columns per thread = " <<  bcols/chunk  << '\n';
  cout << '\n';

  for (INTEGER i=1; i<=rsize; ++i) r[i] = 0.0;

  time_start = clock();

  #pragma omp parallel default(shared)
  {
    #pragma omp for schedule(static, chunk)

    for (INTEGER j=1; j<=bcols; ++j) {
      for (INTEGER k=1; k<=acols; ++k) {
        for (INTEGER i=1; i<=arows; ++i) {
          r(i,j) += a(i,k)*b(k,j);
        }
      }
    }

  }

  time_stop = clock();
  cout<<"openmp mult time = " << time_stop - time_start << '\n';

  rnorm = 0.0;
  for (INTEGER i=1; i<=rsize; ++i) rnorm += pow(r[i],2);
  rnorm = sqrt(rnorm);
  cout<<"rnorm = " << rnorm << '\n';
  cout << '\n';

  return 0;
}
