//  cssutil.c - utility for munging css files, version X0.1
//  Copyright 2001  William S. Yerazunis, all rights reserved.
//  
//  This software is licensed to the public under the Free Software
//  Foundation's GNU GPL, version 1.0.  You may obtain a copy of the
//  GPL by visiting the Free Software Foundations web site at
//  www.fsf.org .  Other licenses may be negotiated; contact the 
//  author for details.  
//

//  include some standard files

#include "crm114_sysincludes.h"

//  include any local crm114 configuration file
#include "crm114_config.h"

//  include the crm114 data structures file
#include "crm114_structs.h"

//  and include the routine declarations file
#include "crm114.h"

void helptext ()
{
  fprintf (stdout, 
	   "Usage: cssutil [-b -r] [-s css-size] cssfile\n"\
	   "                -h   - print this help\n"\
	   "                -b   - brief; print only summary\n"\
	   "                -r   - report then exit (no menu)\n"\
	   "                -s css-size  - if no cssfile found, create new\n"\
	   "                               cssfile with this many buckets.\n"\
	   "                -S css-size  - same as -s, but round up to next\n"\
	   "                               2^n + 1 boundary.\n");
  
}

#include <math.h>

int main (int argc, char **argv)
{

  long i,j,k;    //  some random counters, when we need a loop
  long v;
  long sparse_spectrum_file_length = DEFAULT_SPARSE_SPECTRUM_FILE_LENGTH;
  long user_set_css_length = 0;
  long hfsize;
  long long sum; // sum of the hits... can be _big_.  
  FILE *f;
  int hfd;
  int brief=0; 
  int n;
  int report_only=0;

  long *bcounts;
  long maxchain;
  long curchain;
  long totchain;
  long fbuckets;
  long nchains;
  long zvbins;
  long ofbins;

  char cmdstr[255];
  char cmdchr [2];
  char crapchr [2];
  float cmdval;

  //    the following for crm114.h's happiness

  char * newinputbuf;
  newinputbuf = (char *) &hfd;
  
  bcounts = malloc (sizeof (unsigned long) * FEATUREBUCKET_VALUE_MAX);

  {
    struct stat statbuf;    //  filestat buffer
    FEATUREBUCKET_TYPE *hashes;              //  the text of the hash file
    
    // parse cmdline params
    for(n=1; n < argc; n++)
      {
        if (strncmp (argv[n], "-b", 2) == 0)
          {
	    brief = 1;        // brief, no 'bin value ...' lines
          }
	else if (strncmp (argv[n], "-r", 2) == 0)
          {
	    report_only = 1;        // print stats only, no cmd menu.
          }
	else if (strncmp (argv[n], "-s", 2) == 0) // new CSS of this size
          {
	    n++;
	    if (n < (argc - 1) &&
		sscanf (argv[n], "%ld", &sparse_spectrum_file_length));
	    fprintf (stderr, "\nOverride css creation length to %ld\n",
		     sparse_spectrum_file_length);
	    user_set_css_length = 1;
	  }

	else if (strncmp (argv[n], "-S", 2) == 0)
	  {
	    n++;    // move to the next arg
	    if (n < argc &&
		sscanf (argv[n], "%ld", &sparse_spectrum_file_length))
	      {
		{
		  long k;
		  
		  k = (long) floor ( 
				    (log10 (sparse_spectrum_file_length-1))
				    / 
				    (log10 (2.0)) 
				    );
		  while ( (2<<k) + 1 < sparse_spectrum_file_length)
		    {
		      k++;
		    };
		  sparse_spectrum_file_length=(2<<k)+1;
		  user_set_css_length = 1;
		};
	      }
	    else
	      {
		fprintf (stderr, "On -s flag: Missing or incomprehensible .CSS file length.\n");
		exit (EXIT_FAILURE);
	      };
	  }
	else if (strncmp (argv[n], "-?", 2) == 0 
		 || strncmp(argv[n],"-h", 2) == 0 ) 
	  {
	    helptext();
	    exit (EXIT_SUCCESS);
	  }
	else
	  {
	    break;
	  };
      };
    
    if(!argv[n])
      {
        helptext();
	exit (EXIT_SUCCESS);
      };
    
    //   filename is argv[n]
    //             and stat it to get it's length
    k = stat (argv[n], &statbuf);
    //             quick check- does the file even exist?
    if (k == 0)
      {
	hfsize = statbuf.st_size ;
	if (sparse_spectrum_file_length != hfsize && user_set_css_length)
	  fprintf (stderr, "\n.CSS file %s exists; -s option ignored.\n",
		   argv[n]);
      }
    else
      {
	//      file didn't exist... create it
	fprintf (stdout, "\nHad to create .CSS file %s\n", argv[n]);
	f = fopen (argv[n], "w");
        if (!f)
	  {
	    fprintf (stderr, 
		"\n Couldn't open file %s for writing; errno=%d .\n",
		 argv[n], errno);
	    exit (EXIT_FAILURE);
          };
	//       put in some bytes of NULL
	for (j = 0; j < sparse_spectrum_file_length 
	       * sizeof (FEATUREBUCKET_STRUCT); j++) 
	  fputc ('\000', f);
	fclose (f);
	//    and reset the statbuf to be correct
	k = stat (argv[n], &statbuf);
	hfsize = statbuf.st_size ;
      };
    //    
    //         mmap the hash file into memory so we can bitwhack it
    hfd = open (argv[n], O_RDWR);
    if (hfd > 0)
      {
        hashes = (FEATUREBUCKET_TYPE *) mmap (NULL, hfsize, 
					  PROT_READ + PROT_WRITE,
					  MAP_SHARED, hfd, 0);
	if (hashes == MAP_FAILED)
          {
	    fprintf (stderr, 
		"\n Couldn't mmap file %s into memory; errno=%d .\n",
		argv[n], errno);
	    exit (EXIT_FAILURE);
          };	
      }
    else
      {
	fprintf (stderr, "\n Couldn't open RW file %s; errno=%d .\n",
		 argv[n], errno);
	exit (EXIT_FAILURE);
      };
    //   from now on, hfsize is buckets, not bytes.
    hfsize = statbuf.st_size / sizeof (FEATUREBUCKET_STRUCT);
    fprintf (stdout, "\n Sparse spectra file %s statistics: \n", argv[n]);
    //
  zloop:
    
    //crm_packcss (hashes, hfsize, 1, hfsize-1);
    sum = 0; 
    for ( i = 1; i < hfsize; i++) 
      sum = sum + hashes [i].value;
    
    //   calculate maximum overflow chain length
    maxchain = 0;
    curchain = 0;
    totchain = 0;
    fbuckets = 0;
    nchains = 0;
    zvbins = 0;
    ofbins = 0;
    for (i = 1; i < hfsize; i++)
      {
	if (hashes[i].key != 0) 
	  {
	    fbuckets++;
	    totchain += curchain;
	    curchain++;
	    if (hashes[i].value == 0) zvbins ++;
	    if (hashes[i].value >= FEATUREBUCKET_VALUE_MAX) ofbins ++;
	  };
	if (hashes[i].key == 0) 
	  {
	    if (curchain > 0)
	      {
		nchains++;
		curchain = 0;
	      };
	  };
	if (curchain > maxchain) maxchain = curchain;
      };

    if (fbuckets == 0) fbuckets = 1;
    if (nchains == 0) nchains = 1;
    
    fprintf (stdout, "\n Total available buckets          : %12ld ",
	     hfsize);
    fprintf (stdout, "\n Total buckets in use             : %12ld  ", 
	     fbuckets );
    fprintf (stdout, "\n Total in-use zero-count buckets  : %12ld  ", 
	     zvbins);
    fprintf (stdout, "\n Total buckets with value >= max  : %12ld  ", 
	     ofbins);
    fprintf (stdout, "\n Total hashed datums in file      : %12lld",
	     sum);
    fprintf (stdout, "\n Average datums per bucket        : %12.2f",
	     (sum * 1.0) / (fbuckets * 1.0) );
    fprintf (stdout, "\n Maximum length of overflow chain : %12ld  ", 
	     maxchain);
    fprintf (stdout, "\n Average length of overflow chain : %12.2f ", 
	     (totchain * 1.0) / (fbuckets * 1.0));
    fprintf (stdout, "\n Average packing density          : %12.2f\n",
	     (fbuckets * 1.0) / (hfsize * 1.0) );
    
    for ( i = 0; i < FEATUREBUCKET_VALUE_MAX; i++)
      bcounts[i] = 0;
    
    for (v = 1; v < hfsize; v++)
      {
	if (hashes[v].value < FEATUREBUCKET_VALUE_MAX)
	  bcounts[ hashes[v].value] ++;
	if (hashes[v].value >= FEATUREBUCKET_VALUE_MAX)
	  fprintf (stderr, "You seem to have a featurebucket that is larger than the maximum permitted.\n");
      };
    
    if ( ! brief )
      for (i = 0; i < FEATUREBUCKET_VALUE_MAX; i++)
	{
	  if (bcounts[i] > 0)
	    {
	      fprintf (stdout, "\n bin value %8ld found %9ld times", 
		       i, bcounts[i]);
	    };
	};
    
    fprintf (stdout, "\n");
    
    if ( report_only )
      goto done ;
    
    fprintf (stdout, "\n");
    
  cmdloop:
    fprintf (stdout, "Options:\n");
    fprintf (stdout, "   Z n - zero bins at or below a value\n");
    fprintf (stdout, "   S n - subtract a constant from all bins\n");
    fprintf (stdout, "   D n - divide all bins by a constant\n");
    fprintf (stdout, "   R - rescan\n");
    fprintf (stdout, "   P - pack\n");
    fprintf (stdout, "   Q - quit\n");
    fprintf (stdout, ">>> ");
    
    clearerr (stdin);
    fscanf (stdin, "%[^\n]", cmdstr);
    fscanf (stdin, "%c", crapchr);
    sscanf (cmdstr, "%s %f", cmdchr, &cmdval);
    if (strcasecmp (cmdchr, "z") == 0)
      {
	fprintf (stdout, "Working...");
	for (i = 1; i < hfsize; i++)
	  if (hashes[i].value <= cmdval) hashes[i].value = 0;
	fprintf (stdout, "done.\n");
	goto cmdloop;
      };
    if (strcasecmp (cmdchr, "s") == 0)
      {
	fprintf (stdout, "Working...");
	for (i = 1; i < hfsize; i++)
	  {
	    if (hashes[i].value > (int) cmdval) 
	      {
		hashes[i].value = hashes[i].value - cmdval;
	      }
	    else
	      {
		hashes[i].value = 0;
	      };
	  };
	fprintf (stdout, "done.\n");
	goto cmdloop;
      };
    if (strcasecmp (cmdchr, "d") == 0)
      {
	if (cmdval == 0)
	  {
	    fprintf (stdout, "You can't divide by zero, nimrod!\n");
	    goto cmdloop;
	  };
	fprintf (stdout, "Working...");
	for (i = 1; i < hfsize; i++)
	  hashes[i].value = hashes[i].value / cmdval;
	fprintf (stdout, "done.\n");
	goto cmdloop;
      };
    if (strcasecmp (cmdchr, "r") == 0)
      {
	goto zloop;
      };
    if (strcasecmp (cmdchr, "p") == 0)
      {
	fprintf (stdout, "Working...");
	crm_packcss (hashes, hfsize, 1, hfsize-1);
	goto zloop;
      };
    if (strcasecmp (cmdchr, "q") == 0)
      {
	fprintf (stdout, "Bye! \n");
	goto done;
      };
    
  done: ;
  }
  return 0;
}

void crm_packcss (FEATUREBUCKET_TYPE *h, long hs, long packstart, long packlen)
{
  //    How we pack...
  //   
  //    We look at each bucket, and attempt to reinsert it at the "best"
  //    place.  We know at worst it will end up where it already is, and
  //    at best it will end up lower (at a lower index) in the file, except
  //    if it's in wraparound mode, in which case we know it will not get
  //    back up past us (since the file must contain at least one empty)
  //    and so it's still below us in the file.

  if (packstart+packlen < hs)
    {
      crm_packseg (h, hs, packstart, packlen);
    }
  else
    {
      crm_packseg (h, hs, packstart, (hs - packstart - 1));
      crm_packseg (h, hs, 1, (packlen - (hs - packstart - 1)));
    };
}
      
void crm_packseg (FEATUREBUCKET_TYPE *h, long hs, long packstart, long packlen)
{
  long ifrom, ito;
  long thash, tkey, tvalue;

  for (ifrom = packstart; ifrom < packstart + packlen; ifrom++)
    {
      //  Is it an empty bucket?  (remember, we're compressing out 
      //  all placeholder buckets, so any bucket that's zero-valued
      //  is a valid target.)
      if ( h[ifrom].value == 0)
	{
	  //    Empty bucket - turn it from marker to empty
	  h[ifrom].key = 0;
	  h[ifrom].hash = 0;
	}
    }

  //  Our slot values are now somewhat in disorder; we need to re-insert
  //  slot data in a bucket where it will be found.
  //
  for (ifrom = packstart; ifrom < packstart+packlen; ifrom++)
    {
      //    Now place each bucket where it can best go.
      //
      ito = h[ifrom].hash % hs;
      if (ito == 0) ito = 1;
      while (h[ito].value != 0 && ito != ifrom)
	{
	  ito++;
	  if (ito >= hs) ito = 1;
	};
      //
      //    found an empty slot, put this value there, and zero the
      //    original one.  But only do this if it's not a NOOP.
      //
      if (ito != ifrom)
	{
	  thash  = h[ifrom].hash;
	  tkey   = h[ifrom].key;
	  tvalue = h[ifrom].value;
	  
	  h[ifrom].hash  = 0;
	  h[ifrom].key   = 0;
	  h[ifrom].value = 0;
	  
	  h[ito].hash  = thash;
	  h[ito].key   = tkey;
	  h[ito].value = tvalue;
	};
    };
}
