/* PoempelFox MPIIO Benchmark.
 * Based on Thomas Zeisers SFS-File-Corruption-Reproducer
 */
 
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <mpi.h>

unsigned long long bufsize = 64 * 1024 * 1024; /* 64 MB */
unsigned long long numiter = 8; /* Number of iterations on Buffer per Thread */
char * fname = "MPIIObench-testfile.dat";
int rank = -1;
int mp_size = -1;
int enableread  = 0;
int enablewrite = 0;
int verblev = 0;
unsigned char * buf;

static unsigned char sortofrandomchars[] = {
  /* These are some more or less random values that are written to the file.
   * Note: these are actually from a crc32 tab :) */
  0x00, 0x77, 0x07, 0x30, 0x96, 0xee, 0x0e, 0x61, 0x2c, 0x99,  /*  10 */
  0x09, 0x51, 0xba, 0x07, 0x6d, 0xc4, 0x19, 0x70, 0x6a, 0xf4,  /*  20 */
  0x8f, 0xe9, 0x63, 0xa5, 0x35, 0x9e, 0x64, 0x95, 0xa3, 0x0e,  /*  30 */
  0xdb, 0x88, 0x32, 0x79, 0xdc, 0xb8, 0xa4, 0xe0, 0xd5, 0xe9,  /*  40 */
  0x1e, 0x97, 0xd2, 0xd9, 0x88, 0x09, 0xb6, 0x4c, 0x2b, 0x7e,  /*  50 */
  0xb1, 0x7c, 0xbd, 0xe7, 0xb8, 0x2d, 0x07, 0x90, 0xbf, 0x1d,  /*  60 */
  0x91, 0x1d, 0xb7, 0x10, 0x64, 0x6a, 0xb0, 0x20, 0xf2, 0xf3,  /*  70 */
  0xb9, 0x71, 0x48, 0x84, 0xbe, 0x41, 0xde, 0x1a, 0xda, 0xd4,  /*  80 */
  0x7d, 0x6d, 0xdd, 0xe4, 0xeb, 0xf4, 0xd4, 0xb5, 0x51, 0x83,  /*  90 */
  0xd3, 0x85, 0xc7, 0x13, 0x6c, 0x98, 0x56, 0x64, 0x6b, 0xa8,  /* 100 */
  0xc0, 0xfd, 0x62, 0xf9, 0x7a, 0x8a, 0x65, 0xc9, 0xec, 0x14,  /* 110 */
  0x01, 0x5c, 0x4f, 0x63, 0x06, 0x6c, 0xd9, 0xfa, 0x0f, 0x3d,  /* 120 */
  0x63, 0x8d, 0x08, 0x0d, 0xf5, 0x3b, 0x6e, 0x20, 0xc8, 0x4c,  /* 130 */
  0x69, 0x10, 0x5e, 0xd5, 0x60, 0x41, 0xe4, 0xa2, 0x67, 0x71,  /* 140 */
  0x72, 0x3c, 0x03, 0xe4, 0xd1, 0x4b, 0x04, 0xd4, 0x47, 0xd2,  /* 150 */
  0x0d, 0x85, 0xfd, 0xa5, 0x0a, 0xb5, 0x6b, 0x35, 0xb5, 0xa8,  /* 160 */
  0xfa, 0x42, 0xb2, 0x98, 0x6c, 0xdb, 0xbb, 0xc9, 0xd6, 0xac,  /* 170 */
  0xbc, 0xf9, 0x40, 0x32, 0xd8, 0x6c, 0xe3, 0x45, 0xdf, 0x5c,  /* 180 */
  0x75, 0xdc, 0xd6, 0x0d, 0xcf, 0xab, 0xd1, 0x3d, 0x59, 0x26,  /* 190 */
  0xd9, 0x30, 0xac, 0x51, 0xde, 0x00, 0x3a, 0xc8, 0xd7, 0x51,  /* 200 */
  0x80, 0xbf, 0xd0, 0x61, 0x16, 0x21, 0xb4, 0xf4, 0xb5, 0x56,  /* 210 */
  0xb3, 0xc4, 0x23, 0xcf, 0xba, 0x95, 0x99, 0xb8, 0xbd, 0xa5,  /* 220 */
  0x0f, 0x28, 0x02, 0xb8, 0x9e, 0x5f, 0x05, 0x88, 0x08, 0xc6,  /* 230 */
  0x0c, 0xd9, 0xb2, 0xb1, 0x0b, 0xe9, 0x24, 0x2f, 0x6f, 0x7c,  /* 240 */
  0x87, 0x58, 0x68, 0x4c, 0x11, 0xc1, 0x61, 0x1d, 0xab, 0xb6,  /* 250 */
  0x66, 0x2d, 0x3d, 0x76, 0xdc, 0x41, 0x90, 0x01, 0xdb, 0x71,  /* 260 */
  0x06, 0x98, 0xd2, 0x20, 0xbc, 0xef, 0xd5, 0x10, 0x2a, 0x71,  /* 270 */
  0xb1, 0x85, 0x89, 0x06, 0xb6, 0xb5, 0x1f, 0x9f, 0xbf, 0xe4,  /* 280 */
  0xa5, 0xe8, 0xb8, 0xd4, 0x33, 0x78, 0x07, 0xc9, 0xa2, 0x0f,  /* 290 */
  0x00, 0xf9, 0x34, 0x96, 0x09, 0xa8, 0x8e, 0xe1, 0x0e, 0x98,  /* 300 */
  0x18, 0x7f, 0x6a, 0x0d, 0xbb, 0x08, 0x6d, 0x3d, 0x2d, 0x91,  /* 310 */
  0x64, 0x6c, 0x97, 0xe6, 0x63, 0x5c, 0x01, 0x6b, 0x6b, 0x51,  /* 320 */
  0xf4, 0x1c, 0x6c, 0x61, 0x62, 0x85, 0x65, 0x30, 0xd8, 0xf2,  /* 330 */
  0x62, 0x00, 0x4e, 0x6c, 0x06, 0x95, 0xed, 0x1b, 0x01, 0xa5,  /* 340 */
  0x7b, 0x82, 0x08, 0xf4, 0xc1, 0xf5, 0x0f, 0xc4, 0x57, 0x65,  /* 350 */
  0xb0, 0xd9, 0xc6, 0x12, 0xb7, 0xe9, 0x50, 0x8b, 0xbe, 0xb8,  /* 360 */
  0xea, 0xfc, 0xb9, 0x88, 0x7c, 0x62, 0xdd, 0x1d, 0xdf, 0x15,  /* 370 */
  0xda, 0x2d, 0x49, 0x8c, 0xd3, 0x7c, 0xf3, 0xfb, 0xd4, 0x4c,  /* 380 */
  0x65, 0x4d, 0xb2, 0x61, 0x58, 0x3a, 0xb5, 0x51, 0xce, 0xa3,  /* 390 */
  0xbc, 0x00, 0x74, 0xd4, 0xbb, 0x30, 0xe2, 0x4a, 0xdf, 0xa5,  /* 400 */
  0x41, 0x3d, 0xd8, 0x95, 0xd7, 0xa4, 0xd1, 0xc4, 0x6d, 0xd3,  /* 410 */
  0xd6, 0xf4, 0xfb, 0x43, 0x69, 0xe9, 0x6a, 0x34, 0x6e, 0xd9,  /* 420 */
  0xfc, 0xad, 0x67, 0x88, 0x46, 0xda, 0x60, 0xb8, 0xd0, 0x44,  /* 430 */
  0x04, 0x2d, 0x73, 0x33, 0x03, 0x1d, 0xe5

};
#define MPIPRINTF(fmt, x...) \
  printf("[%d] @%s:%d " fmt, rank, __FILE__, __LINE__, x)

/*  !=======================================================================
    !
    !     Read single restart file using MPI-IO
    !
    !=======================================================================
*/

void mpiio_read_status(void)
{
  MPI_File fh;
  MPI_Status status;
  MPI_Offset my_global_off;
  unsigned long long i, j;
  unsigned long idx;

  my_global_off = (unsigned long long)(mp_size - rank - 1) * bufsize * numiter;
  if (verblev >= 1) {
    MPIPRINTF("reading from '%s', starting at offset %llu\n", fname, (unsigned long long)my_global_off);
  }

  if ( MPI_File_open(MPI_COMM_WORLD, fname, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh) != MPI_SUCCESS ) {
    MPIPRINTF("%s\n", "reading restart data: MPI_File_open() failed");
    MPI_Abort(MPI_COMM_WORLD, 1);
  }

  for (j = 0; j < numiter; j++) {
    if ( MPI_File_read_at/*_all*/(fh, my_global_off, buf, bufsize, MPI_CHAR, &status) != MPI_SUCCESS ) {
      MPIPRINTF("reading restart: MPI_File_read_at_all() failed at offset %llu - status %d\n", (unsigned long long)my_global_off, status.MPI_ERROR);
    }
    idx = my_global_off % sizeof(sortofrandomchars);
    for (i = 0; i < bufsize; i++) {
      if (buf[i] != sortofrandomchars[idx]) {
        MPIPRINTF("Read corrupt data at offset %llu: %02x != %02x\n", (my_global_off+i), buf[i], sortofrandomchars[(my_global_off+i) % sizeof(sortofrandomchars)]);
        MPI_Abort(MPI_COMM_WORLD, 2);
      }
      idx++;
      if (idx >= sizeof(sortofrandomchars)) {
        idx = 0;
      }
    }
    if (verblev >= 2) {
      MPIPRINTF("read iter %llu\n", j);
    }
    my_global_off += bufsize;
  }
  
  MPI_File_close(&fh);
}

/*  !=======================================================================
    !
    !     Write single restart file using MPI-IO
    !
    !=======================================================================
*/
void mpiio_write_status(void)
{
  MPI_File fh;
  MPI_Status status;
  MPI_Offset my_global_off;
  unsigned long long i, j;
  unsigned long idx;

  my_global_off = (unsigned long long)rank * bufsize * numiter;
  if (verblev >= 1) {
    MPIPRINTF("writing to '%s', starting at offset %llu\n", fname, (unsigned long long)my_global_off);
  }

  if ( MPI_File_open(MPI_COMM_WORLD, fname, (MPI_MODE_WRONLY | MPI_MODE_CREATE), MPI_INFO_NULL, &fh) != MPI_SUCCESS ) {
    MPIPRINTF("%s\n", "writing restart data: MPI_File_open() failed");
    MPI_Abort(MPI_COMM_WORLD, 1);
  }

  for (j = 0; j < numiter; j++) {
    idx = my_global_off % sizeof(sortofrandomchars);
    for (i = 0; i < bufsize; i++) {
      buf[i] = sortofrandomchars[idx];
      idx++;
      if (idx >= sizeof(sortofrandomchars)) {
        idx = 0;
      }
    }
    if ( MPI_File_write_at/*_all*/(fh, my_global_off, buf, bufsize, MPI_CHAR, &status) != MPI_SUCCESS ) {
      MPIPRINTF("writing restart: MPI_File_write_at_all() failed at offset %llu - status %d\n", (unsigned long long)my_global_off, status.MPI_ERROR);
    }
    if (verblev >= 2) {
      MPIPRINTF("wrote iter %llu\n", j);
    }
    my_global_off += bufsize;
  }

  MPI_File_close(&fh);
}

void showhelp(char * argv0)
{
  printf("Syntax: %s [--bufsize n] [--numiter n] [--file filename] [--read] [--write] [-v]\n\n", argv0);
  printf("Each MPI Process will write/read bufsize Bytes numiter times to file and then print\n");
  printf("the data rate that was achieved. It will also check data read for errors.\n");
  printf("-v makes output more verbose. Can be repeated multiple times.\n");
}

int main(int argc, char** argv)
{

  long i;
  int err;

  for (i = 1; i < argc; i++) {
    if (strcmp(argv[i], "--bufsize") == 0) {
      i++;
      if (i < argc) {
        bufsize = strtoul(argv[i], NULL, 10);
      } else {
        printf("Error: bufsize requires a parameter...\n");
        exit(1);
      }
    } else if (strcmp(argv[i], "--numiter") == 0) {
      i++;
      if (i < argc) {
        numiter = strtoul(argv[i], NULL, 10);
      } else {
        printf("Error: numiter requires a parameter...\n");
        exit(1);
      }
    } else if (strcmp(argv[i], "--file") == 0) {
      i++;
      if (i < argc) {
        fname = argv[i];
      } else {
        printf("Error: file requires a parameter...\n");
        exit(1);
      }
    } else if (strcmp(argv[i], "--read") == 0) {
      enableread = 1;
    } else if (strcmp(argv[i], "--write") == 0) {
      enablewrite = 1;
    } else if (strcmp(argv[i], "-v") == 0) {
      verblev++;
    } else {
      printf("Unknown command line argument: %s\n", argv[i]);
      showhelp(argv[0]);
      exit(1);
    }
  }

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&mp_size);

  if (verblev >= 2) {
    MPIPRINTF("Hello, world! I am rank %d of %d.\n", rank+1, mp_size);
  }
  if (rank == 0) {
    printf("Using settings: nprocesses = %d, bufsize = %llu, numiter = %llu, writing = %d, reading = %d\n", mp_size, bufsize, numiter, enablewrite, enableread);
    printf("                filesize: %llu MeBiBytes in file '%s'\n",
           ((unsigned long long)mp_size * bufsize * numiter) / (1024LLU * 1024LLU),
           fname);
  }
  
  buf = malloc( bufsize );
  if (!buf) {
    MPIPRINTF("%s\n", "Out of memory.");
    exit(0);
  }

  if (enablewrite) {
    /* Synchronize */
    if ((err = MPI_Barrier(MPI_COMM_WORLD)) != 0) {
      MPIPRINTF("Barrier failed - returncode %d.\n", err);
    }
    double starttime = MPI_Wtime();
    mpiio_write_status();
    if ((err = MPI_Barrier(MPI_COMM_WORLD)) != 0) {
      MPIPRINTF("Barrier failed - returncode %d.\n", err);
    }
    if (rank == 0) {
      double dur = MPI_Wtime() - starttime;
      unsigned long long btotal = (unsigned long long)mp_size * bufsize * numiter;
      printf("%.2lf seconds for writing %llu Bytes (%llu MeBiBytes).\n",
             dur,
             btotal,
             btotal / (1024LLU * 1024LLU));
      double bps = ((double)btotal) / dur;
      printf("%.2lf MB/sec (%.2lf MeBiBytes/sec)\n", bps / (1000.0 * 1000.0), bps / (1024.0 * 1024.0));
    }
  }
  if (enableread) {
    /* Synchronize */
    if ((err = MPI_Barrier(MPI_COMM_WORLD)) != 0) {
      MPIPRINTF("Barrier failed - returncode %d.\n", err);
    }
    double starttime = MPI_Wtime();
    mpiio_read_status();
    if ((err = MPI_Barrier(MPI_COMM_WORLD)) != 0) {
      MPIPRINTF("Barrier failed - returncode %d.\n", err);
    }
    if (rank == 0) {
      double dur = MPI_Wtime() - starttime;
      unsigned long long btotal = (unsigned long long)mp_size * bufsize * numiter;
      printf("%.2lf seconds for reading %llu Bytes (%llu MeBiBytes).\n",
             dur,
             btotal,
             btotal / (1024LLU * 1024LLU));
      double bps = ((double)btotal) / dur;
      printf("%.2lf MB/sec (%.2lf MeBiBytes/sec)\n", bps / (1000.0 * 1000.0), bps / (1024.0 * 1024.0));
    }
  }

  MPI_Finalize();

  return 0;
}
