/* GNU polyxmass - the massist's program.
   -------------------------------------- 
   Copyright (C) 2000,2001,2002,2003,2004 Filippo Rusconi

   http://www.polyxmass.org

   This file is part of the "GNU polyxmass" project.
   
   The "GNU polyxmass" project is an official GNU project package (see
   www.gnu.org) released ---in its entirety--- under the GNU General
   Public License and was started at the Centre National de la
   Recherche Scientifique (FRANCE), that granted me the formal
   authorization to publish it under this Free Software License.

   This software is free software; you can redistribute it and/or
   modify it under the terms of the GNU  General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.
   
   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.
   
   You should have received a copy of the GNU  General Public
   License along with this software; if not, write to the
   Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.
*/

#include "pxmchem-atom.h"



/* NEW'ING FUNCTIONS, DUPLICATING FUNCTIONS ...
 */
PxmAtomcount *
pxmchem_atomcount_new (void)
{
  PxmAtomcount *atomcount = NULL;

  atomcount = g_malloc0 (sizeof (PxmAtomcount));
  
  return atomcount;
}


PxmAtomcount *
pxmchem_atomcount_dup (const PxmAtomcount *atomcount)
{
  PxmAtomcount *new_atomcount = NULL;

  g_assert (atomcount != NULL);
  
  new_atomcount = pxmchem_atomcount_new ();

  strcpy (new_atomcount->atom, atomcount->atom);
  new_atomcount->count = atomcount->count;
  
  return new_atomcount;
}


PxmAtom *
pxmchem_atom_new (void)
{
  PxmAtom *atom = NULL;

  atom = g_malloc0 (sizeof (PxmAtom));
  
  atom->massGPA = g_ptr_array_new ();
  
  atom->abundGPA = g_ptr_array_new ();
  
  return atom;
}


PxmAtom *
pxmchem_atom_new_by_symbol (gchar *symbol, GPtrArray *atom_refGPA)
{
  PxmAtom *atom = NULL;
  gint iter = 0;
  

  g_assert (symbol != NULL);
  g_assert (atom_refGPA != NULL);
  
  for (iter = 0; iter < atom_refGPA->len; iter++)
    {
      atom = g_ptr_array_index (atom_refGPA, iter);
      
      if (0 == strcmp (atom->symbol, symbol))
	return pxmchem_atom_dup (atom);
    }

  /* An atom by the same name that the one passed as param was not
   * found in the array. Error, then.
   */
  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_WARNING,
	_("%s@%d: failed to find an atom by the same symbol: '%s'\n"),
	 __FILE__, __LINE__, symbol);
  
  return NULL;
}


PxmAtom *
pxmchem_atom_dup (const PxmAtom *atom)
{
  gint iter = 0;

  gdouble *mass = NULL;
  gdouble *abund = NULL;
  gdouble *new_mass = NULL;
  gdouble *new_abund = NULL;
  
  PxmAtom *new_atom = NULL;
  
  g_assert (atom != NULL);
  
  new_atom->name = g_strdup (atom->name);
  new_atom->symbol = g_strdup (atom->symbol);
  
  new_atom->avg = atom->avg;
  
  new_atom->mono = atom->mono;
  
  /* Sanity check: there must be the same number of items in the 
   * two arrays below. If OK duplicate the two arrays in parallel.
   */
  g_assert (atom->massGPA->len == atom->abundGPA->len);
  
  for (iter = 0; iter < atom->massGPA->len; iter++)
    {
      mass = g_ptr_array_index (atom->massGPA, iter);
      new_mass = g_malloc0 (sizeof (gdouble));
      *new_mass = *mass;
      
      abund = g_ptr_array_index (atom->abundGPA, iter);
      new_abund = g_malloc0 (sizeof (gdouble));
      *new_abund = *abund;
    }
  
  return new_atom;
}



/* DATA MODIFYING FUNCTIONS
 */
gboolean
pxmchem_atom_set_name (PxmAtom *atom, gchar *name)
{
  g_assert (atom != NULL && name != NULL);
  
  if (atom->name != NULL)
    g_free (atom->name);
  
  atom->name = g_strdup (name);
  
  return TRUE;
}

gboolean
pxmchem_atom_set_symbol (PxmAtom *atom, gchar *symbol)
{
  g_assert (atom != NULL && symbol != NULL);
  
  if (atom->symbol != NULL)
    g_free (atom->symbol);
  
  atom->symbol = g_strdup (symbol);
  
  return TRUE;
}

gboolean
pxmchem_atom_fill_masspair (gchar *symbol, 
			    PxmMasspair *masspair,
			    GPtrArray *atom_refGPA)
{
  gint iter = 0;
  PxmAtom *atom = NULL;
  
  g_assert (symbol != NULL);
  g_assert (masspair != NULL);
  g_assert (atom_refGPA != NULL);
  
  for (iter = 0; iter < atom_refGPA->len; iter ++)
    {
      atom = g_ptr_array_index (atom_refGPA, iter);
      g_assert (atom != NULL);
      
      if (0 == strcmp (atom->symbol, symbol))
	{
	  libpolyxmass_masspair_set_both (masspair, atom->mono, atom->avg);
	  
	  return TRUE;
	}
    }
  
  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	 _("%s@%d: failed to find an atom with symbol: '%s'\n"),
	 __FILE__, __LINE__, symbol);
  
  return FALSE;
}





/* LOCATING FUNCTIONS
 */
gint
pxmchem_atom_get_index_by_ptr (GPtrArray *GPA, 
				   PxmAtom *atom)
{
  gint iter = 0;
  

  g_assert (GPA != NULL && atom != NULL);
  
  for (iter = 0; iter < GPA->len; iter++)
    if ((PxmAtom *) g_ptr_array_index (GPA, iter) == atom)
      return iter;
      
  return -1;
}

gint
pxmchem_atom_get_index_by_name (gchar *name, GPtrArray *GPA)
{
  return pxmchem_atom_get_index_top_by_name (name, GPA);
}

gint
pxmchem_atom_get_index_top_by_name (gchar *name, GPtrArray *GPA)
{
  gint iter = 0;
  PxmAtom *atom = NULL;
  

  g_assert (GPA != NULL);
  g_assert (name != NULL);
  
  for (iter = 0; iter < GPA->len; iter ++)
    {
      atom = g_ptr_array_index (GPA, iter);
      
      if (0 == strcmp (atom->name, name))
	return iter;
    }
  
  return -1;
}

gint
pxmchem_atom_get_index_bottom_by_name (gchar *name, GPtrArray *GPA)
{
  gint iter = 0;
  PxmAtom *atom = NULL;
  

  g_assert (GPA != NULL);
  g_assert (name != NULL);
  
  
  if (GPA->len > 0)
    {
      for (iter = GPA->len -1 ; iter >= 0; iter--)
	{
	  atom = g_ptr_array_index (GPA, iter);
	  
	  if (0 == strcmp (atom->name, name))
	    return iter;
	}
    }
  
  return -1;
}


gint
pxmchem_atom_get_index_by_symbol (gchar *symbol, GPtrArray *GPA)
{
  return pxmchem_atom_get_index_top_by_symbol (symbol, GPA);
}

gint
pxmchem_atom_get_index_top_by_symbol (gchar *symbol, GPtrArray *GPA)
{
  gint iter = 0;
  PxmAtom *atom = NULL;
  

  g_assert (GPA != NULL);
  g_assert (symbol != NULL);
  
  
  for (iter = 0; iter < GPA->len; iter ++)
    {
      atom = g_ptr_array_index (GPA, iter);
      
      if (0 == strcmp (atom->symbol, symbol))
	return iter;
    }
  
  return -1;
}

gint
pxmchem_atom_get_index_bottom_by_symbol (gchar *symbol, GPtrArray *GPA)
{
  gint iter = 0;
  PxmAtom *atom = NULL;
  

  g_assert (GPA != NULL);
  g_assert (symbol != NULL);

  if (GPA->len > 0)
    {
      for (iter = GPA->len -1 ; iter >= 0; iter--)
	{
	  atom = g_ptr_array_index (GPA, iter);
	  
	  if (0 == strcmp (atom->symbol, symbol))
	    return iter;
	}
    }
  
  return -1;
}


PxmAtom *
pxmchem_atom_get_ptr_by_symbol (gchar *symbol, GPtrArray *GPA)
{
  return pxmchem_atom_get_ptr_top_by_symbol (symbol, GPA);
}

PxmAtom *
pxmchem_atom_get_ptr_top_by_symbol (gchar *symbol, GPtrArray *GPA)
{
  gint iter = 0;
  PxmAtom *atom = NULL;
  

  g_assert (GPA != NULL);
  g_assert (symbol != NULL);
  
  
  for (iter = 0; iter < GPA->len; iter ++)
    {
      atom = g_ptr_array_index (GPA, iter);
      
      if (0 == strcmp (atom->symbol, symbol))
	return atom;
    }
  
  return NULL;
}


PxmAtom *
pxmchem_atom_get_ptr_bottom_by_symbol (gchar *symbol, GPtrArray *GPA)
{
  gint iter = 0;
  PxmAtom *atom = NULL;
  

  g_assert (GPA != NULL);
  g_assert (symbol != NULL);

  if (GPA->len > 0)
    {
      for (iter = GPA->len -1 ; iter >= 0; iter--)
	{
	  atom = g_ptr_array_index (GPA, iter);
	  
	  if (0 == strcmp (atom->symbol, symbol))
	    return atom;
	}
    }
  
  return NULL;
}


gint
pxmchem_atom_get_highest_abund_isotope_idx (PxmAtom *atom)
{
  gdouble *abundance = NULL;
  gdouble temp = 0;
  
  gint iter = 0;
  gint idx = -1;

  g_assert (atom != NULL);
    
  /* We just have to find which isotope has the highest abundance.
   */
  for (iter = 0; iter < atom->abundGPA->len; iter++)
    {
      abundance = g_ptr_array_index (atom->abundGPA, iter);
      g_assert (abundance != NULL);
      
      if (*abundance > temp)
	{
	  temp = *abundance;
	  idx = iter;
	}
    }
  
  /* At this point, we should have a idx variable greater than or
   * equal to 0.
   */
  if (idx == -1)
    g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	   _("%s@%d: failed to search monoisotopic mass for atom: '%s'\n"),
	   __FILE__, __LINE__, atom->symbol);
  
  return idx;
}


gint
pxmchem_atom_get_lowest_mass_isotope_idx (PxmAtom *atom)
{
  gdouble *mass = NULL;
  gdouble temp = 0;
  
  gint iter = 0;
  gint idx = -1;
  
  g_assert (atom != NULL);

  /* We just have to find which isotope has the lowest mass.
   */
  for (iter = 0; iter < atom->massGPA->len; iter++)
    {
      mass = g_ptr_array_index (atom->massGPA, iter);
      g_assert (mass != NULL);
      
      if (temp == 0)
	{
	  temp = *mass;
	  idx = iter;
	}
      else
	{
	  if (*mass < temp)
	    {
	      temp = *mass;
	      idx = iter;
	    }
	}
    }
  
  /* At this point, we should have a idx variable greater than or
   * equal to 0.
   */
  if (idx == -1)
    g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	   _("%s@%d: failed to search monoisotopic mass for atom: '%s'\n"),
	   __FILE__, __LINE__, atom->symbol);
  
  return idx;
}








/* INTEGRITY CHECKING FUNCTIONS
 */
gboolean
pxmchem_atom_known_by_symbol (gchar *symbol,  
			      gint *idx,
			      GPtrArray *atom_refGPA)
{
  gint iter = 0;
  
  PxmAtom *atom = NULL;
  
  g_assert (symbol != NULL && strlen (symbol) > 0);
  g_assert (atom_refGPA != NULL);
  
  for (iter = 0; iter < atom_refGPA->len; iter++)
    {
      atom = g_ptr_array_index (atom_refGPA, iter);
      
      if (0 == strcmp (atom->symbol, symbol))
	{
	  if (idx != NULL)
	    *idx = iter;
	  
	  return TRUE;
	}
    }
  
  /* Atom by the same symbol not found, return FALSE and set
   * idx to -1, if it is non-NULL.
   */
  if (idx != NULL)
    *idx = -1;
  
  return FALSE;
}


gboolean
pxmchem_atom_check_symbol_syntax (gchar *symbol)
{
  gint len = 0;
  gint iter = 0;
  
  gsize size = 0;
  
  g_assert (symbol != NULL);

  size = strlen (symbol);
  g_assert (size < G_MAXINT);
  len = (gint) size;
  
  if (len <= 0)
    return FALSE;
  
  if (len > MAX_ATOM_SYMBOL_LEN)
    return FALSE;
  
  for (iter = 0; iter < MAX_ATOM_SYMBOL_LEN; iter++)
    {
      /* If the atom symbol is one character-long, then, we 
	 might have to exit this loop, because it is designed 
	 to work with two character-long symbols, such  as "Ca".
      */
      if (iter == len)
	return TRUE;
      
      /* If first char of the symbol, must be uppercase.
       */
      else if (iter == 0)
	{
	  if (FALSE == isupper (symbol [iter]))
	    return FALSE;
	}
      else
	if (FALSE == islower (symbol [iter]))
	  return FALSE;
    }
  
  return TRUE;
}


gboolean
pxmchem_atom_check_data_integrity (PxmAtom *atom)
{
  gint iter = 0;
  
  gdouble *mass = 0;
  gdouble *abund = 0;
  
  g_assert (atom != NULL);
  
  if (atom->name == NULL
      ||
      strlen (atom->name) <= 0)
    return FALSE;
  
  if (atom->symbol == NULL
      ||
      strlen (atom->symbol) <= 0)
    return FALSE;
  
  if (FALSE == pxmchem_atom_check_symbol_syntax (atom->symbol))
    return FALSE;
  
  /* The masses array cannot be NULL nor empty. Its members cannot
   * be NULL nor <= 0 either.
   */
  if (atom->massGPA == NULL
      ||
      atom->massGPA->pdata == NULL)
    return FALSE;
  
  /* The abundances array cannot be NULL nor empty. Its members cannot
   * be NULL nor <= 0 either.
   */
  if (atom->abundGPA == NULL
      ||
      atom->abundGPA->pdata == NULL)
    return FALSE;

  /* Both the array MUST have the same number of mass and abundance
   * data in them, since these data do in fact go in pairs.
   */
  if (atom->massGPA->len != atom->abundGPA->len)
    return FALSE;
  
  for (iter = 0; iter < atom->massGPA->len; iter++)
    {
      mass = g_ptr_array_index (atom->massGPA, iter);
     
      if (mass == NULL)
	return FALSE;
      
      if (*mass <= 0)
	return FALSE;
      
      abund = g_ptr_array_index (atom->abundGPA, iter);
      
      if (abund == NULL)
	return FALSE;
      
      if (*abund <= 0)
	return FALSE;
    }
  
  return TRUE;
}



gboolean
pxmchem_atom_array_validate_all (GPtrArray *atomGPA, gchar **valid)
{
  gint iter = 0;
  gint jter = 0;
  
  gdouble *mass = 0;
  gdouble *abund = 0;

  PxmAtom *atom = NULL;

  GString *gs = NULL;

  /* Note that for integrity reasons, *valid MUST BE NULL to ensure 
   * that it is empty.
   */
  g_assert (valid != NULL);
  g_assert (*valid == NULL);

  g_assert (atomGPA != NULL);
  

  /* Allocate the GString into which the errors (if any) are to be
   * appended.
   */
  gs = g_string_new ("");

  for (iter = 0; iter < atomGPA->len; iter++)
    {
      atom = g_ptr_array_index (atomGPA, iter);
      g_assert (atom != NULL);
      
      if (atom->name == NULL
	  ||
	  strlen (atom->name) <= 0)
	{
	  g_string_append_printf (gs, 
				  _("atom at index: '%d' has no name\n"),
				  iter);
	}
	  
      if (atom->symbol == NULL
	  ||
	  strlen (atom->symbol) <= 0)
	{
	  g_string_append_printf (gs, 
				  _("atom at index: '%d' has no symbol\n"),
				  iter);
	}
      else
	{
	  if (FALSE == pxmchem_atom_check_symbol_syntax (atom->symbol))
	    {
	      g_string_append_printf (gs, 
				      _("atom at index: '%d' has an "
					"invalid symbol\n"),
				      iter);
	    }
	}
      
      if (atom->massGPA == NULL
	  ||
	  atom->massGPA->pdata == NULL
	  || 
	  atom->massGPA->len <= 0)
	{
	  g_string_append_printf 
	    (gs, 
	     _("atom at index: '%d' has empty monoisotopic mass array\n"),
	     iter);
	}
      
      if (atom->abundGPA == NULL
	  ||
	  atom->abundGPA->pdata == NULL
	  ||
	  atom->abundGPA->len <= 0)
	{
	  g_string_append_printf 
	    (gs, 
	     _("atom at index: '%d' has empty monoisotopic abundance array\n"),
	     iter);
	}
      
      if (atom->massGPA->len != atom->abundGPA->len)
	{
	  g_string_append_printf 
	    (gs, 
	     _("atom at index: '%d' has monoisotopic "
	       "arrays of different size\n"),
	     iter);
	}
      
      for (jter = 0; jter < atom->massGPA->len; jter++)
	{
	  mass = g_ptr_array_index (atom->massGPA, jter);

	  if (mass == NULL || *mass <= 0)
	    g_string_append_printf 
	      (gs, 
	       _("atom at index: '%d' has bad monoisotopic mass "
		 "data at index: '%d'\n"),
	       iter, jter);
	  
	  abund = g_ptr_array_index (atom->abundGPA, jter);

	  if (abund == NULL || *abund <= 0)
	    g_string_append_printf 
	      (gs, 
	       _("atom at index: '%d' has bad monoisotopic abundance "
		 "data at index: '%d'\n"),
	       iter, jter);
	}
    }
  

  /* Finally, we finished validating all the atoms in array...
   */
  if (strlen (gs->str) > 0)
    {
      /* At least one error occurred.
       */
      *valid = gs->str;
      
      g_string_free (gs, FALSE);
      
      return FALSE;
    }
  
  g_string_free (gs, TRUE);
  
  return TRUE;
}


/* COMPUTATION FUNCTIONS
 */
gboolean
pxmchem_atom_set_mono_in_atom_by_highest_abund (PxmAtom *atom)
{
  gint idx = -1;
  
  g_assert (atom != NULL);
    
  /* We just have to find which isotope has the highest abundance.
   */
  idx = pxmchem_atom_get_highest_abund_isotope_idx (atom);
    
  /* At this point, we should have a idx variable greater than or
   * equal to 0.
   */
  if (idx == -1)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: failed to search monoisotopic mass for atom: '%s'\n"),
	     __FILE__, __LINE__, atom->symbol);

      return FALSE;
    }
  
  atom->mono = *(gdouble *) g_ptr_array_index (atom->massGPA, idx);
  
  return TRUE;
}


gboolean
pxmchem_atom_set_mono_in_atom_by_lowest_mass (PxmAtom *atom)
{
  gint idx = -1;
  
  /* We just have to find which isotope has the lowest mass.
   */
  idx = pxmchem_atom_get_lowest_mass_isotope_idx (atom);
  
  /* At this point, we should have a idx variable greater than or
   * equal to 0.
   */
  if (idx == -1)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: failed to search monoisotopic mass for atom: '%s'\n"),
	     __FILE__, __LINE__, atom->symbol);

      return FALSE;
    }
  
  atom->mono = *(gdouble *) g_ptr_array_index (atom->massGPA, idx);
  
  return TRUE;
}


gboolean
pxmchem_atom_calc_avg_by_symbol (gchar *symbol, gdouble *avg,
				 GPtrArray *atom_refGPA)
{
  gint iter = 0;
  gint idx = 0;
  
  gdouble total_abund = 0;

  gdouble *mass = NULL;
  gdouble *abund = NULL;
    
  PxmAtom *atom = NULL;
  
  g_assert (symbol != NULL && strlen (symbol) > 0);
  g_assert (avg != NULL);
  g_assert (atom_refGPA != NULL);
  
  *avg = 0;
  
  /* Does the symbol exist ? If so, idx contains the index of the 
   * corresponding atom in the reference atom array.
   */
  if (FALSE == pxmchem_atom_known_by_symbol (symbol, &idx, atom_refGPA))
    return FALSE;
  
  atom = g_ptr_array_index (atom_refGPA, idx);
  
  /* Compute the sum of the abundances.
   */
  for (iter = 0; iter < atom->abundGPA->len; iter++)
    {
      abund = g_ptr_array_index (atom->abundGPA, iter);
      
      total_abund += *abund ;
    }
  
  /* Compute the average mass: sum of the product of each isotopic mass
   * per the ratio of the related abundance over the total abundance.
   */
  for (iter = 0; iter < atom->abundGPA->len; iter++)
    {
      mass = g_ptr_array_index (atom->massGPA, iter);
      abund = g_ptr_array_index (atom->abundGPA, iter);
      
      *avg += *mass * (*abund / total_abund);
    }
  
  return TRUE;
}

gboolean
pxmchem_atom_calc_avg_in_atom (PxmAtom *atom)
{
  gint iter = 0;
  gdouble total_abund = 0;
  
  gdouble *mass = NULL;
  gdouble *abund = NULL;
    
  g_assert (atom != NULL);
  
  atom->avg = 0;
  
  /* Compute the sum of the abundances.
   */
  for (iter = 0; iter < atom->abundGPA->len; iter++)
    {
      abund = g_ptr_array_index (atom->abundGPA, iter);
      
      total_abund += *abund ;
    }
  
  /* Compute the average mass: sum of the product of each isotopic mass
   * per the ratio of the related abundance over the total abundance.
   */
  for (iter = 0; iter < atom->abundGPA->len; iter++)
    {
      mass = g_ptr_array_index (atom->massGPA, iter);
      abund = g_ptr_array_index (atom->abundGPA, iter);
      
      atom->avg += *mass * (*abund / total_abund);
    }

  return TRUE;
}



/* XML-format TRANSACTIONS
 */
gchar *
pxmchem_atomdef_format_xml_string_DTD (void)
{
  gchar *result = NULL;
  
  gchar *DTD = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
"<!-- DTD for atom definitions, used by the\n"            
    "'GNU polyxmass' suite of mass spectrometry applications.\n"
    "Copyright 2003, 2004 Filippo Rusconi - Licensed under the GNU GPL -->\n"
    "<!DOCTYPE atomdefdata [\n"
    "<!ELEMENT atomdefdata (atom+)>\n"
    "<!ELEMENT atom (name,symbol,isotope+)>\n"
    "<!ELEMENT name (#PCDATA)>\n"
    "<!ELEMENT symbol (#PCDATA)>\n"
			"<!ELEMENT isotope (mass , abund)>\n"
    "<!ELEMENT mass (#PCDATA)>\n"
    "<!ELEMENT abund (#PCDATA)>\n"
    "]>\n";
  
  result = g_strdup (DTD);
  
  return result;
}

gchar *
pxmchem_atom_format_xml_string_atomdefdata (GPtrArray *atomGPA,
					    gchar *indent, gint offset)
{
  /* 
     The 'atomGPA' pointer to an array of PxmAtom objects will allow the 
     iteration in all the atom definitions. For each atom, 
     pxmchem_atom_format_xml_string_atom is called to format the 
     corresponding xml element.
  */
  gint iter = 0;
  gint new_offset = 0;
  
  gchar *lead = NULL;
  gchar *help = NULL;
  
  GString *gs = NULL;

  PxmAtom *atom = NULL;
  
  g_assert (atomGPA != NULL);
  g_assert (indent != NULL);
  
  gs = g_string_new ("");
  g_assert (gs != NULL);
  

  /* We are willing to create a string that begins with the DTD and
     that next looks like this:

     <atomdefdata>
       <atom>
         <name>Hydrogen</name>
         <symbol>H</symbol>
         <isotope>
           <mass>1.0078250370</mass>
           <abund>99.9885000000</abund>
         </isotope>
         <isotope>
           <mass>2.0141017870</mass>
           <abund>0.0115000000</abund>
         </isotope>
       </atom>
       .
       .
       .
     </atomdefdata>
  */

  /* 
     We first have to put the Document Type definition:
  */
  help = pxmchem_atomdef_format_xml_string_DTD ();

  /* Now construct the string to be sent to the xml file:
   */
  g_string_append_printf (gs, "%s", help);
  g_free (help);

  /* Open the <atomdefdata> node. Next iterate in the array of
     atoms...
   */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s<atomdefdata>\n", lead);
  
  g_free (lead);
  new_offset = offset + 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  for (iter = 0; iter < atomGPA->len; iter++)
    {
      atom = g_ptr_array_index (atomGPA, iter);
      g_assert (atom != NULL);
      
      help = pxmchem_atom_format_xml_string_atom (atom, indent, new_offset);
      g_assert (help != NULL);
      
      g_string_append_printf (gs, "%s", help);
      g_free (help);
    }
  
  /* Finally we can close the <atomdefdata> node.
   */
  
  lead = libpolyxmass_globals_format_string_lead (indent, offset);
  g_string_append_printf (gs, "%s</atomdefdata>\n", lead);
  g_free (lead);
  
  g_assert (gs != NULL);
  
  help = gs->str;
  
  g_string_free (gs, FALSE);
  
  return help;
}




gchar *
pxmchem_atom_format_xml_string_atom (PxmAtom *atom, 
				     gchar *indent, gint offset)
{
  /* The pointer to the atom instance will allow the iteration
   * in both the arrays of mass and abundance values.
   */
  gint iter = 0;
  gint new_offset = 0;
  
  gdouble *val = NULL;

  gchar *lead = NULL;
  gchar *help = NULL;
    
  GString *gs = NULL;
  
  
  g_assert (atom != NULL && indent != NULL);
  
  gs = g_string_new ("");
  g_assert (gs != NULL);
  
  /* We are willing to create an <atom> node that should look like this:
   *
   *  <atom>
   *    <name>Hydrogen</name>
   *    <symbol>H</symbol>
   *    <isotope>
   *      <mass>1.0078250370</mass>
   *      <abundance>99.9885000000</abundance>
   *    </isotope>
   *    <isotope>
   *      <mass>2.0141017870</mass>
   *      <abundance>0.0115000000</abundance>
   *    </isotope>
   *  </atom>
   *
   */

  new_offset = offset;
  
  /* Open the <atom> element right now and immediately insert the 
   * non-iterative data.
   */
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  g_string_append_printf (gs, "%s<atom>\n", lead);
  
  g_free (lead);
  new_offset++;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  g_string_append_printf (gs, "%s<name>%s</name>\n",
			  lead, atom->name);
  g_string_append_printf (gs, "%s<symbol>%s</symbol>\n",
			  lead, atom->symbol);
  
  /* Now handle the iterative mass/abundance data. 
   * The two arrays MUST be of the same size, exactly.
   */
  g_assert (atom->massGPA->len == atom->abundGPA->len);
  
  for (iter = 0; iter < atom->massGPA->len; iter++)
    {
      /* Open a new <isotope> element (no indentation change yet).
       */
      g_string_append_printf (gs,"%s<isotope>\n", lead);
      
      /* Prepare a newly offsetted xml lead string for the 
       * elements to come.
       */
      g_free (lead);
      new_offset++;
      lead = libpolyxmass_globals_format_string_lead (indent, new_offset);

      /* First the mass of the isotope.
       * Open the <mass> element, insert the value, 
       * close the <mass> element.
       */
      g_string_append_printf (gs, "%s<mass>", lead);

      val = g_ptr_array_index (atom->massGPA, iter);
      
      /* Convert that to a string with 10 decimal digits.
       */
      help = libpolyxmass_globals_dtoa (*val, 10);
      gs =  g_string_append (gs, help);
      g_free (help);
      
      gs = g_string_append (gs, "</mass>\n");
      
      /* Second the abundance of the isotope, open the <abund> element.
       * Open the <abund> element, insert the value, 
       * close the <abund> element.
       */
      g_string_append_printf (gs, "%s<abund>", lead);

      val = g_ptr_array_index (atom->abundGPA, iter);
      /* Convert that to a string with 10 decimal digits.
       */
      help = libpolyxmass_globals_dtoa (*val, 10);
      gs =  g_string_append (gs, help);
      g_free (help);
      
      gs = g_string_append (gs, "</abund>\n");

      /* Prepare a less offsetted xml lead for the closing </isotope> tag
       * and for the next <isotope> round (if any !).
       */
      g_free (lead);
      new_offset--;
      lead = libpolyxmass_globals_format_string_lead (indent, new_offset);

      g_string_append_printf (gs,"%s</isotope>\n", lead);
    }
  
  /* Once all the mass/abund pairs in <isotope> elements have been 
   * formatted, we have to close the <atom> element (watch the offset).
   */
  g_free (lead);
  new_offset--;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  g_string_append_printf (gs, "%s</atom>\n", lead);

  g_free (lead);
  
  help = gs->str;
  
  g_string_free (gs, FALSE);
  
  return help;
}


PxmAtom *
pxmchem_atom_render_xml_node_atom (xmlDocPtr xml_doc,
				   xmlNodePtr xml_node,
				   gpointer user_data)
{
  gboolean one_isotope = FALSE;

  PxmAtom *atom = NULL;

  /* We are willing to create a new PxmAtom instance based on the 
   * following xml data:
   *
   *  <atom>
   *    <name>Hydrogen</name>
   *    <symbol>H</symbol>
   *    <isotope>
   *      <mass>1.0078250370</mass>
   *      <abundance>99.9885000000</abundance>
   *    </isotope>
   *    <isotope>
   *      <mass>2.0141017870</mass>
   *      <abundance>0.0115000000</abundance>
   *    </isotope>
   *  </atom>
   *
   * The node that is pointed to by xml_node is the <atom> node:
   *
   * <atom> element tag:
   *  ^
   *  |
   *  +----- here we are right now.
   *
   * Which means that xml_node->name == "atom" and that
   * we'll have to go one step down to the first child of the 
   * current node in order to get to the <name> element.
   */

  g_assert (xml_node != NULL);
  g_assert (0 == strcmp ((gchar *) xml_node->name, "atom"));

  /* Allocate the atom instance that we'll fill with the xml-based
   * data.
   */
  atom = pxmchem_atom_new (); /* auto alloc of the GPtrArrays */
  
  
  /* Now go to the first child of current node: <name>.
   */
  xml_node = xml_node->children;

  /* From a rigorous XML parsing point of view, the blanks found in
   * the XML document are considered to be nodes, and we have to detect
   * these and take proper action: go next sibling (next blank) as long
   * as blanks are encountered.
   */
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;
  
  /* Check that we have effectively a <name> element here.
   */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "name"));
  
  atom->name = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  
  /* Now go to the next child of <atom> node, which is a sibling
   * of the <name> node, actually <symbol>.
   */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  /* Check that we have effectively a <symbol> element here.
   */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "symbol"));
  
  atom->symbol = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);

  /* At this point we may have one or more <isotope> elements.
   * We should have one now, as this is going to be the first 
   * <isotope> element, and an atom without event one <isotope>
   * is not possible neither in nature nor in xml DTD.
   */

  /* So, go to the next child(ren) of <atom> node, which is/are
   * siblings of the <symbol> node, actually <isotope>. There
   * can be more than one, but at least one <isotope> node.
   */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  while (xml_node != NULL)
    {
      /* Check that we have effectively a <isotope> element here.
       */
      g_assert (0 == strcmp ((gchar *) xml_node->name, "isotope"));

      if (FALSE == pxmchem_atom_render_xml_node_isotope (xml_doc,
							 xml_node,
							 atom,
							 NULL))
	{
	  pxmchem_atom_free (atom);
	  
	  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		_("%s@%d: failed to render an isotope node\n"),
		 __FILE__, __LINE__);
	  
	  return NULL;
	}

      /* At least one <isotope> is found, Mother Nature is happy.
       */
      one_isotope = TRUE;
      
      /* Finally go the next <isotope> element, if any.
       */
      xml_node = xml_node->next;
      /* From a rigorous XML parsing point of view, the blanks found in
       * the XML document are considered to be nodes, and we have to detect
       * these and take proper action: go next sibling (next blank) as long
       * as blanks are encountered.
       */
      while (TRUE == xmlIsBlankNode (xml_node))
	xml_node = xml_node->next;
    }
  
  if (one_isotope == FALSE)
    {
      pxmchem_atom_free (atom);
      
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_ERROR,
	    _("%s@%d: an atom can not have no isotope\n"),
	     __FILE__, __LINE__);

      return NULL;
    }
  
  /* We have finished parsing the atom's specifications, which means 
   * that we can perfectly ask that the monoisotopic mass be set
   * to the mono membmer of PxmAtom and that the average mass be 
   * calculated and set to the avg member.
   */
  if (FALSE == pxmchem_atom_set_mono_in_atom_by_lowest_mass (atom))
    g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	   _("%s@%d: failed to set monoisotopic mass to atom: '%s'\n"),
	   __FILE__, __LINE__, atom->symbol);

  if (FALSE == pxmchem_atom_calc_avg_in_atom (atom))
    g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	   _("%s@%d: failed to calculate/set average mass to atom: '%s'\n"),
	   __FILE__, __LINE__, atom->symbol);

  return atom ;
}


gboolean
pxmchem_atom_render_xml_node_isotope (xmlDocPtr xml_doc,
				      xmlNodePtr xml_node,
				      PxmAtom *atom,
				      gpointer user_data)
{
  gdouble *val = NULL;

  gchar *help = NULL;
  

  /* We are willing to create a mass/abund pair based on the 
   * following xml data:
   *
   *    <isotope>
   *      <mass>1.0078250370</mass>
   *      <abundance>99.9885000000</abundance>
   *    </isotope>
   *
   * The node that is pointed to by xml_node is the <isotope> node:
   *
   * <isotope> element tag:
   *  ^
   *  |
   *  +----- here we are right now.
   *
   * Which means that xml_node->name == "isotope" and that
   * we'll have to go one step down to the first child of the 
   * current node in order to get to the <mass> element.
   *
   * The DTD says <!ELEMENT isotope (mass , abund)>
   */

  g_assert (atom != NULL);
  g_assert (atom->massGPA != NULL);

  g_assert (xml_node != NULL);
  g_assert (0 == strcmp ((gchar *) xml_node->name, "isotope"));


  /* Now go to the first child of current node: <mass>.
   */
  xml_node = xml_node->children;

  /* From a rigorous XML parsing point of view, the blanks found in
   * the XML document are considered to be nodes, and we have to detect
   * these and take proper action: go next sibling (next blank) as long
   * as blanks are encountered.
   */
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;
  
  /* Check that we have effectively a <mass> element here.
   */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "mass"));
  

  /* Allocate the gdouble so that the mass will fit in.
   */
  val = g_malloc0 (sizeof (gdouble));
  
  help = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
      
  if (FALSE == libpolyxmass_globals_strtod (help, val))
    {
      g_free (help);
      
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: failed to convert mass: '%s' to gdouble\n"),
	     __FILE__, __LINE__, 
	     xml_node->xmlChildrenNode->content);
      
      return FALSE;
    }
      
  g_free (help);
  
  g_ptr_array_add (atom->massGPA, val);
      
  /* Now go to the second child of <isotope>: <abund>, which is
   * a sibling of <mass>.
   */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;
      
  /* Check that current node is <abund>
   */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "abund"));
  
  /* Allocate the gdouble so that the abundance will fit in.
   */
  val = g_malloc0 (sizeof (gdouble));
      
  help = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  
  if (FALSE == libpolyxmass_globals_strtod (help, val))
    {
      g_free (help);
      
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: failed to convert abund: '%s' to gdouble\n"),
	     __FILE__, __LINE__, 
	     xml_node->xmlChildrenNode->content);

      return FALSE;
      
    }
      
  g_free (help);
  
  g_ptr_array_add (atom->abundGPA, val);

  
  /* We have finished parsing the atom's specifications.
   */

  /*
    debug_printf (("the isotope just rendered is for %s (%s)\n",
    atom->name, atom->symbol));
  */

  return TRUE;
}




/* returns the number of successfully added atoms to fillGPA, -1 if
 * an error occurred.
 */
gint
pxmchem_atom_render_xml_file (gchar *file, GPtrArray *fillGPA)
{
  /* We have the filename for the XML file of atom specifications and
   * the pointer to the GPtrArray* where to put the atoms rendered 
   * from the xml data.
   */
  gint count = 0;

  PxmAtom * atom = NULL;
  
  xmlDocPtr xml_doc = NULL;
  xmlNodePtr xml_node = NULL;

  g_assert (fillGPA != NULL);
  
  /* Check that the file is an XML file: <?xml version="1.0"?> 
   * should be the first item in the file. By the way, this call will
   * also check that this file exists !
   */
  if (FALSE == libpolyxmass_globals_check_xml_file (file))
    return -1;
  
  /* Build an XML tree from a the file;
   */
  xml_doc = xmlParseFile (file);
  
  if (xml_doc == NULL)
    return -1;

  /* Check if the document is of the right kind.
   */
  xml_node = xmlDocGetRootElement (xml_doc);

  if (xml_node == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: the document is empty: '%s'\n"),
	     __FILE__, __LINE__, file);

      xmlFreeDoc (xml_doc);

      return -1;
    }
  
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  /* The root of the document should be a non-blank node of name
     "atomdefdata".
   */
  if (0 != strcmp ((gchar *) xml_node->name, "atomdefdata"))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: wrong type document, root node is not \"atomdefdata\": '%s'\n"), 
	     __FILE__, __LINE__, file);
      
      xmlFreeDoc (xml_doc);

      return -1;
    }
  
  /* We are at the root of the document, we have to go one step
   * further to get our hands to atom element(s) that we will
   * parse with its own dedicated function. The DTD says that we may 
   * have more than one atom, but at least one <atom> child node to 
   * <atomdefdata>.
   */
  xml_node = xml_node->children;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;
  
  while (xml_node != NULL)
    {
      if (0 == strcmp ((gchar *) xml_node->name, "atom"))
	{
	  atom = pxmchem_atom_render_xml_node_atom (xml_doc,
						    xml_node,
						    NULL);
	  if (NULL == atom)
	    {
	      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		    _("%s@%d: failed to render an atom from xml node\n"),
		     __FILE__, __LINE__);
	      
	      xmlFreeDoc (xml_doc);

	      return -1;
	    }

	  g_ptr_array_add (fillGPA, atom);
	  
	  count = count + 1;
	  
	  /* And now go to the next <atom>, if any.
	   */
	  xml_node = xml_node->next;
	  /* From a rigorous XML parsing point of view, the blanks found in
	   * the XML document are considered to be nodes, and we have to 
	   * detect these and take proper action: go next sibling 
	   * (next blank) as long as blanks are encountered.
	   */
	  while (TRUE == xmlIsBlankNode (xml_node))
	    xml_node = xml_node->next;
      	}
    }
  
  xmlFreeDoc (xml_doc);

#if 0
  {
    gint iter = 0;
    
    PxmAtom *iter_atom = NULL;
    
    printf ("number of atoms parsed: '%d'\n", fillGPA->len);
    
    for (iter = 0; iter < fillGPA->len; iter++)
      {
	iter_atom = g_ptr_array_index (fillGPA, iter);
	
	printf ("atom at index: '%d' is: '%s' ('%s')\n",
		iter, 
		iter_atom->name,
		iter_atom->symbol);
      }
  }
#endif
  
  return count;
}


/* FREE'ING FUNCTIONS
 */
gboolean
pxmchem_atomcount_free (PxmAtomcount *atomcount)
{
  g_assert (atomcount != NULL);
  
  g_free (atomcount);
  
  return TRUE; 
}


gboolean
pxmchem_atom_free (PxmAtom *atom)
{
  gdouble *val = NULL ;
  
  g_assert (atom != NULL);
  
  if (atom->name != NULL)
    g_free (atom->name);
  
  if (atom->symbol != NULL)
    g_free (atom->symbol);
  
  /* Now free all the members in the two arrays. But first make
   * sanity checks, even if we are destroying (helps debugging):
   * the arrays are allocated upon newing the PxmAtom so they should
   * NOT be NULL.
   */
  if (atom->massGPA != NULL || atom->abundGPA != NULL)
    {
      /* If one array is no empty, the other cannot be empty neither !
       */
      g_assert (atom->massGPA->len == atom->abundGPA->len);
    }
  
  if (atom->massGPA != NULL)
    {
      while (atom->massGPA->len > 0)
	{
	  val = g_ptr_array_remove_index (atom->massGPA, 0);
	  g_assert (val != NULL);
	  g_free (val);
	}
      
      g_ptr_array_free (atom->massGPA, TRUE);
    }
  
  if (atom->abundGPA != NULL)
    {
      while (atom->abundGPA->len > 0)
	{
	  val = g_ptr_array_remove_index (atom->abundGPA, 0);
	  g_assert (val != NULL);
	  g_free (val);
	}
      
      g_ptr_array_free (atom->abundGPA, TRUE);
    }

  g_free (atom);
  
  return TRUE;
}


/* GPtrArray-RELATED FUNCTIONS
 */
gint
pxmchem_atom_GPA_empty (GPtrArray *GPA)
{
  gint count = 0;
  
  PxmAtom *atom = NULL;
  
  
  g_assert (GPA != NULL);
  
  while (GPA->len > 0)
    {
      atom = g_ptr_array_remove_index (GPA, 0);
      g_assert (atom != NULL);
      pxmchem_atom_free (atom);
      count++;
    }
  
  return count;
}

gint
pxmchem_atom_GPA_free (GPtrArray *GPA)
{
  gint count = 0;
  
  g_assert (GPA != NULL);

  count = pxmchem_atom_GPA_empty (GPA);
  
  g_ptr_array_free (GPA, TRUE);
  
  return count;
}

gint
pxmchem_atomcount_GPA_empty (GPtrArray *GPA)
{
  gint count = 0;
  
  PxmAtomcount *atomcount = NULL;
  
  
  g_assert (GPA != NULL);
  
  while (GPA->len > 0)
    {
      atomcount = g_ptr_array_remove_index (GPA, 0);
      g_assert (atomcount != NULL);
      pxmchem_atomcount_free (atomcount);
      count++;
    }
  
  return count;
}

gint
pxmchem_atomcount_GPA_free (GPtrArray *GPA)
{
  gint count = 0;
  
  
  g_assert (GPA != NULL);
  
  count = pxmchem_atomcount_GPA_empty (GPA);
  
  g_ptr_array_free (GPA, TRUE);
  
  return count;
}


