/*============================================================================
 * Main structure for an I/O numbering scheme associated with mesh entities
 * (such as cells, faces, and vertices);
 *
 * In parallel mode, such a scheme is important so as to redistribute
 * locally numbered entities on n processes to files written by p
 * processes, with p <= n.
 *
 * Only the case where p = 1 is presently implemented, so the numbering
 * scheme is simply based on entity's global labels.
 *
 * For p > 1, it would probably be necessary to extend the numbering
 * schemes so as to account for the fact that a given entity may have
 * a main index on its main associated domain, but may be present
 * as a ghost entity with another index on neighboring domains.
 *============================================================================*/

/*
  This file is part of the "Finite Volume Mesh" library, intended to provide
  finite volume mesh and associated fields I/O and manipulation services.

  Copyright (C) 2004-2007  EDF

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*/

/*----------------------------------------------------------------------------
 * Standard C library headers
 *----------------------------------------------------------------------------*/

#include <assert.h>
#include <stdio.h>
#include <string.h>

/*----------------------------------------------------------------------------
 * BFT library headers
 *----------------------------------------------------------------------------*/

#include <bft_mem.h>
#include <bft_printf.h>

/*----------------------------------------------------------------------------
 *  Local headers
 *----------------------------------------------------------------------------*/

#include "fvm_defs.h"
#include "fvm_config_defs.h"
#include "fvm_order.h"
#include "fvm_parall.h"

/*----------------------------------------------------------------------------
 *  Header for the current file
 *----------------------------------------------------------------------------*/

#include "fvm_io_num.h"

/*----------------------------------------------------------------------------*/

#ifdef __cplusplus
extern "C" {
#if 0
} /* Fake brace to force back Emacs auto-indentation back to column 0 */
#endif
#endif /* __cplusplus */

/*============================================================================
 * Local structure definitions
 *============================================================================*/

/*----------------------------------------------------------------------------
 * Structure defining an I/O numbering scheme
 *----------------------------------------------------------------------------*/

/*
 * Notes:
 *
 * This structure currently only contains a global numbering array containing
 * each entity's global number (a 1 to n index). In the future, it may
 * also contain information relative to ghost zones, for I/O to file formats
 * enabling domain splitting with (with multiple groups of processes writing
 * different subsets, using ghost zones for entities on boundaries between
 * mesh parts assigned to different process groups). In such a case, the
 * main numbering would only be global as regards a process group, at least
 * for use with formats such as that of EnSight Gold.
 *
 * In some cases, a global number may appear on multiple processes (for a face
 * or vertex on a processor boundary). This means that multiple processes may
 * update the corresponding value in a gather-type operation preceding or
 * associated with I/O, in an undefined order. If the associated data is up to
 * date on each process, it should be identical, so this should not be a
 * problem. MPI-IO file writes or MPI-2 one-sided communication PUT operations
 * also authorize this, though for the latter, the MPI-2 standard indicates
 * that this is authorized if processes overlapping PUT operations should use
 * the same predefined datatype, which seems to exclude similar indexed
 * datatypes with different indexes. To avoid problems if we wish to use
 * MPI-2 one-sided communication, one relatively simple solution would be to
 * consider that for processes other than that of lowest rank in which an
 * entity appears, it appears after all entities not occuring in processes
 * of lower rank. In that case, we would have two array sizes:
 * global_num_size_tot defining the array's full size, and global_num_size
 * defining the size of the portion to use in gather type operations.
 */

struct _fvm_io_num_t {

  fvm_gnum_t         global_count;    /* Global number of entities */
  fvm_lnum_t         global_num_size; /* Local size of global numbering array */
  const fvm_gnum_t  *global_num;      /* Global (possibly shared) entity
                                         numbers (1 to n) */
  fvm_gnum_t        *_global_num;     /* Global entity numbers if owner,
                                         NULL otherwise */

};

/*=============================================================================
 * Private function definitions
 *============================================================================*/

#if defined(FVM_HAVE_MPI)

/*----------------------------------------------------------------------------
 * Copy selected shared global ordering information to private ordering
 * information for an I/O numbering structure.
 *
 * parameters:
 *   this_io_num <-- pointer to numbering structure
 *----------------------------------------------------------------------------*/

static void
_fvm_io_num_copy_on_write(fvm_io_num_t  *const this_io_num)
{
  if (this_io_num->_global_num == NULL) {
    fvm_lnum_t i;
    BFT_MALLOC(this_io_num->_global_num,
               this_io_num->global_num_size,
               fvm_gnum_t);
    for (i = 0; i < this_io_num->global_num_size; i++)
      this_io_num->_global_num[i] = this_io_num->global_num[i];
    this_io_num->global_num = this_io_num->_global_num;
  }
  assert(this_io_num->global_num == this_io_num->_global_num);
}

/*----------------------------------------------------------------------------
 * Copy selected shared global ordering information to private ordering
 * information for an I/O numbering structure.
 *
 * parameters:
 *   this_io_num          <-> pointer to numbering structure
 *   parent_global_number <-- pointer to shared list of global parent
 *                            entity numbers
 *----------------------------------------------------------------------------*/

static void
_fvm_io_num_try_to_set_shared(fvm_io_num_t      *const this_io_num,
                              const fvm_gnum_t         parent_global_number[])
{
  if (this_io_num->_global_num != NULL && parent_global_number != NULL) {
    fvm_lnum_t i;
    for (i = 0; i < this_io_num->global_num_size; i++)
      if (this_io_num->_global_num[i] != parent_global_number[i])
        break;
    if (i < this_io_num->global_num_size)
      this_io_num->global_num = this_io_num->_global_num;
    else {
      this_io_num->global_num = parent_global_number;
      BFT_FREE(this_io_num->_global_num);
    }
  }
}

/*----------------------------------------------------------------------------
 * Maximum global number associated with an I/O numbering structure
 *
 * parameters:
 *   this_io_num <-- pointer to partially initialized I/O numbering structure.
 *   comm        <-- associated MPI communicator
 *
 * returns:
 *   maximum global number associated with the I/O numbering
 *----------------------------------------------------------------------------*/

static fvm_gnum_t
_fvm_io_num_global_max(const fvm_io_num_t  *const this_io_num,
                       const MPI_Comm             comm)
{
  fvm_gnum_t  local_max, global_max;
  size_t      n_ent;

  /* Get maximum global number value */

  n_ent = this_io_num->global_num_size;
  if (n_ent > 0)
    local_max = this_io_num->global_num[n_ent - 1];
  else
    local_max = 0;

  MPI_Allreduce(&local_max, &global_max, 1, FVM_MPI_GNUM, MPI_MAX, comm);

  return global_max;
}

/*----------------------------------------------------------------------------
 * Global ordering associated with an I/O numbering structure.
 *
 * The structure should contain an initial ordering, which should
 * be sorted, but need not be contiguous. On output, the numbering
 * will be contiguous.
 *
 * As an option, a number of sub-entities per initial entity may be
 * given, in which case sub-entities of a same entity will have contiguous
 * numbers in the final ordering.
 *
 * parameters:
 *   this_io_num    <-> pointer to structure that should be ordered
 *   n_sub_entities <-- optional number of sub-entities per initial entity,
 *                      or NULL if unused
 *   comm           <-- associated MPI communicator
 *----------------------------------------------------------------------------*/

static void
_fvm_io_num_global_order(fvm_io_num_t       *this_io_num,
                         const fvm_lnum_t    n_sub_entities[],
                         MPI_Comm            comm)
{

  fvm_gnum_t  n_ent_recv, num_prev, num_cur;
  size_t      i, j, slice_size;
  fvm_lnum_t  k;
  int         rank;

  _Bool       may_be_shared = false;

  fvm_gnum_t  *global_num_shift = NULL, *recv_global_num = NULL;
  fvm_lnum_t  *recv_n_sub = NULL, *recv_order = NULL;
  int         *send_count = NULL, *recv_count = NULL;
  int         *send_shift = NULL, *recv_shift = NULL;
  int         have_sub_loc = 0, have_sub_glob = 0;

  int         local_rank, size;

  fvm_gnum_t  current_global_num = 0;

  /* Initialization */

  MPI_Comm_rank(comm, &local_rank);
  MPI_Comm_size(comm, &size);

  num_prev = 0;    /* true initialization later for slice 0, */

  /* If numbering is shared, we will check later it was changed or if
     it can remain shared (in which case the copy may be discarded) */

  if (this_io_num->global_num != this_io_num->_global_num)
    may_be_shared = true;
  else
    may_be_shared = false;

  /* Get temporary maximum global number value */

  this_io_num->global_count = _fvm_io_num_global_max(this_io_num, comm);

  /* slice_size = ceil(this_io_num->global_count/size) */

  slice_size = this_io_num->global_count / size;
  if (this_io_num->global_count % size > 0)
    slice_size += 1;

  assert(sizeof(fvm_gnum_t) >= sizeof(fvm_lnum_t));

  BFT_MALLOC(send_count, size, int);
  BFT_MALLOC(recv_count, size, int);

  BFT_MALLOC(send_shift, size, int);
  BFT_MALLOC(recv_shift, size, int);

  /* Count number of values to send to each process */

  for (rank = 0; rank < size; rank++)
    send_count[rank] = 0;

  for (i = 0; i < (size_t)(this_io_num->global_num_size); i++)
    send_count[(this_io_num->global_num[i] - 1) / slice_size] += 1;

  MPI_Alltoall(send_count, 1, FVM_MPI_GNUM, recv_count, 1, FVM_MPI_GNUM, comm);

  send_shift[0] = 0;
  recv_shift[0] = 0;

  for (rank = 1; rank < size; rank++) {
    send_shift[rank] = send_shift[rank - 1] + send_count[rank -1];
    recv_shift[rank] = recv_shift[rank - 1] + recv_count[rank -1];
  }

  /* As data is sorted by increasing base global numbering, we do not
     need to build an extra array, but only to send the correct parts
     of the n_sub_entities[] array to the correct processors */

  n_ent_recv = recv_shift[size - 1] + recv_count[size - 1];

  BFT_MALLOC(recv_global_num, n_ent_recv, fvm_gnum_t);
  BFT_MALLOC(recv_order, n_ent_recv, fvm_lnum_t);

  MPI_Alltoallv(this_io_num->_global_num, send_count, send_shift, FVM_MPI_GNUM,
                recv_global_num, recv_count, recv_shift, FVM_MPI_GNUM, comm);

  /* Do we have sub-entities ? */

  if (n_sub_entities != NULL)
    have_sub_loc = 1;

  MPI_Allreduce(&have_sub_loc, &have_sub_glob, 1, MPI_INT, MPI_MAX, comm);

  if (have_sub_glob > 0) {

    fvm_lnum_t  *send_n_sub;

    BFT_MALLOC(send_n_sub, this_io_num->global_num_size, fvm_lnum_t);
    BFT_MALLOC(recv_n_sub, n_ent_recv, fvm_lnum_t);

    if (n_sub_entities != NULL) {
      for (i = 0; i < (size_t)(this_io_num->global_num_size); i++)
        send_n_sub[i] = n_sub_entities[i];
    }
    else {
      for (i = 0; i < (size_t)(this_io_num->global_num_size); i++)
        send_n_sub[i] = 1;
    }

    MPI_Alltoallv(send_n_sub, send_count, send_shift, FVM_MPI_LNUM,
                  recv_n_sub, recv_count, recv_shift, FVM_MPI_LNUM, comm);

    BFT_FREE(send_n_sub);
  }

  if (n_ent_recv > 0) {

    fvm_order_local_allocated(NULL,
                              recv_global_num,
                              recv_order,
                              n_ent_recv);

    /* Determine global order; requires ordering to loop through buffer by
       increasing number (slice blocks associated with each process are
       already sorted, but the whole "gathered" slice is not).
       We build an initial global order based on the initial global numbering,
       such that for each slice, the global number of an entity is equal to
       the cumulative number of sub-entities */

    if (have_sub_glob > 0) {

      current_global_num = recv_n_sub[recv_order[0]];
      num_prev = recv_global_num[recv_order[0]];
      recv_global_num[recv_order[0]] = current_global_num;

      for (i = 1; i < n_ent_recv; i++) {
        num_cur = recv_global_num[recv_order[i]];
        if (num_cur > num_prev)
          current_global_num += recv_n_sub[recv_order[i]];
        recv_global_num[recv_order[i]] = current_global_num;
        num_prev = num_cur;
      }

    }
    else { /* if (have_sub_glob == 0) */

      current_global_num = 1;
      num_prev = recv_global_num[recv_order[0]];
      recv_global_num[recv_order[0]] = current_global_num;

      for (i = 1; i < n_ent_recv; i++) {
        num_cur = recv_global_num[recv_order[i]];
        if (num_cur > num_prev)
          current_global_num += 1;
        recv_global_num[recv_order[i]] = current_global_num;
        num_prev = num_cur;
      }

    }

  }

  /* Partial clean-up */

  BFT_FREE(recv_n_sub);
  BFT_FREE(recv_order);

  /* At this stage, recv_global_num[] is valid for this process, and
     current_global_num indicates the total number of entities handled
     by this process; we must now shift global numberings on different
     processes by the cumulative total number of entities handled by
     each process */

  BFT_MALLOC(global_num_shift, size, fvm_gnum_t);

  MPI_Allgather(&current_global_num, 1, FVM_MPI_GNUM,
                global_num_shift, 1, FVM_MPI_GNUM, comm);

  for (rank = 1; rank < size; rank++)
    global_num_shift[rank] =   global_num_shift[rank-1]
                             + global_num_shift[rank];

  if (local_rank > 0) {
    for (i = 0; i < n_ent_recv; i++)
      recv_global_num[i] += global_num_shift[local_rank - 1];
  }

  BFT_FREE(global_num_shift);

  /* Return global order to all processors */

  MPI_Alltoallv(recv_global_num, recv_count, recv_shift, FVM_MPI_GNUM,
                this_io_num->_global_num, send_count, send_shift, FVM_MPI_GNUM,
                comm);

  /* Free memory */

  BFT_FREE(recv_global_num);

  BFT_FREE(send_count);
  BFT_FREE(recv_count);
  BFT_FREE(send_shift);
  BFT_FREE(recv_shift);

  /* Get final maximum global number value */

  this_io_num->global_count = _fvm_io_num_global_max(this_io_num, comm);

  /* When sub-entities have been added, now switch from a numbering on
     the initial entities (shifted by number of sub-entities) to
     a numbering on the final sub-entities */

  if (n_sub_entities != NULL) {

    fvm_gnum_t *_global_num;

    for (i = 0, j = 0; i < (size_t)(this_io_num->global_num_size); i++)
      j += n_sub_entities[i];

    BFT_MALLOC(_global_num, j, fvm_gnum_t);

    for (i = 0, j = 0; i < (size_t)(this_io_num->global_num_size); i++) {
      for (k = 0; k < n_sub_entities[i]; j++, k++)
        _global_num[j] = this_io_num->_global_num[i] - n_sub_entities[i] + k + 1;
    }

    BFT_FREE(this_io_num->_global_num);
    this_io_num->_global_num = _global_num;

    if (this_io_num->global_num_size != (fvm_lnum_t)j) {
      this_io_num->global_num_size = j;
      may_be_shared = false;
    }

    if (may_be_shared == false)
      this_io_num->global_num = this_io_num->_global_num;
  }

  /* If numbering was initially shared, check if it was changed or if it
     may remain shared (in which case the copy may be discarded) */

  if (may_be_shared == true) {
    for (i = 0; i < (size_t)(this_io_num->global_num_size); i++)
      if (this_io_num->_global_num[i] != this_io_num->global_num[i])
        break;
    if (i < (size_t)(this_io_num->global_num_size))
      this_io_num->global_num = this_io_num->_global_num;
    else
      BFT_FREE(this_io_num->_global_num);
  }
}

/*----------------------------------------------------------------------------
 * Return the global number of sub-entities associated with an initial
 * entity whose global numbering is known, given the number of
 * sub-entities per initial entity.
 *
 * parameters:
 *   this_io_num    <-- pointer to base io numbering
 *   n_sub_entities <-- number of sub-entities per initial entity
 *   comm           <-- associated MPI communicator
 *
 * returns:
 *   global number of sub-entities
 *----------------------------------------------------------------------------*/

static fvm_gnum_t
_fvm_io_num_global_sub_size(const fvm_io_num_t  *this_io_num,
                            const fvm_lnum_t     n_sub_entities[],
                            MPI_Comm             comm)
{

  fvm_gnum_t  global_count, n_ent_recv, num_prev, num_cur;
  size_t      i, slice_size;
  int         rank;

  fvm_gnum_t  *recv_global_num = NULL;
  fvm_gnum_t  *send_global_num = NULL;
  fvm_lnum_t  *recv_n_sub = NULL, *recv_order = NULL;
  int         *send_count = NULL, *recv_count = NULL;
  int         *send_shift = NULL, *recv_shift = NULL;
  int         have_sub_loc = 0, have_sub_glob = 0;

  int         size;

  fvm_gnum_t  current_global_num = 0;
  fvm_gnum_t  retval = 0;

  /* Initialization */

  MPI_Comm_size(comm, &size);

  num_prev = 0;    /* true initialization later for slice 0, */

  /* Get temporary maximum global number value */

  global_count = _fvm_io_num_global_max(this_io_num, comm);

  /* slice_size = ceil(this_io_num->global_count/size) */

  slice_size = global_count / size;
  if (global_count % size > 0)
    slice_size += 1;

  assert(sizeof(fvm_gnum_t) >= sizeof(fvm_lnum_t));

  BFT_MALLOC(send_count, size, int);
  BFT_MALLOC(recv_count, size, int);

  BFT_MALLOC(send_shift, size, int);
  BFT_MALLOC(recv_shift, size, int);

  /* Count number of values to send to each process */

  for (rank = 0; rank < size; rank++)
    send_count[rank] = 0;

  for (i = 0; i < (size_t)(this_io_num->global_num_size); i++)
    send_count[(this_io_num->global_num[i] - 1) / slice_size] += 1;

  MPI_Alltoall(send_count, 1, FVM_MPI_GNUM, recv_count, 1, FVM_MPI_GNUM, comm);

  send_shift[0] = 0;
  recv_shift[0] = 0;

  for (rank = 1; rank < size; rank++) {
    send_shift[rank] = send_shift[rank - 1] + send_count[rank -1];
    recv_shift[rank] = recv_shift[rank - 1] + recv_count[rank -1];
  }

  /* As data is sorted by increasing base global numbering, we do not
     need to build an extra array, but only to send the correct parts
     of the n_sub_entities[] array to the correct processors */

  n_ent_recv = recv_shift[size - 1] + recv_count[size - 1];

  BFT_MALLOC(recv_global_num, n_ent_recv, fvm_gnum_t);
  BFT_MALLOC(recv_order, n_ent_recv, fvm_lnum_t);

  if (this_io_num->_global_num != NULL)
    send_global_num = this_io_num->_global_num;
  else {
    BFT_MALLOC(send_global_num,
               this_io_num->global_num_size,
               fvm_gnum_t);
    memcpy(send_global_num,
           this_io_num->global_num,
           this_io_num->global_num_size * sizeof(fvm_gnum_t));
  }

  MPI_Alltoallv(send_global_num, send_count, send_shift, FVM_MPI_GNUM,
                recv_global_num, recv_count, recv_shift, FVM_MPI_GNUM, comm);

  if (send_global_num != this_io_num->_global_num)
    BFT_FREE(send_global_num);

  /* Do we have sub-entities ? */

  if (n_sub_entities != NULL)
    have_sub_loc = 1;

  MPI_Allreduce(&have_sub_loc, &have_sub_glob, 1, MPI_INT, MPI_MAX, comm);

  if (have_sub_glob > 0) {

    fvm_lnum_t  *send_n_sub;

    BFT_MALLOC(send_n_sub, this_io_num->global_num_size, fvm_lnum_t);
    BFT_MALLOC(recv_n_sub, n_ent_recv, fvm_lnum_t);

    if (n_sub_entities != NULL) {
      for (i = 0; i < (size_t)(this_io_num->global_num_size); i++)
        send_n_sub[i] = n_sub_entities[i];
    }
    else {
      for (i = 0; i < (size_t)(this_io_num->global_num_size); i++)
        send_n_sub[i] = 1;
    }

    MPI_Alltoallv(send_n_sub, send_count, send_shift, FVM_MPI_LNUM,
                  recv_n_sub, recv_count, recv_shift, FVM_MPI_LNUM, comm);

    BFT_FREE(send_n_sub);
  }

  if (n_ent_recv > 0) {

    fvm_order_local_allocated(NULL,
                              recv_global_num,
                              recv_order,
                              n_ent_recv);

    /* Determine global order; requires ordering to loop through buffer by
       increasing number (slice blocks associated with each process are
       already sorted, but the whole "gathered" slice is not).
       We build an initial global order based on the initial global numbering,
       such that for each slice, the global number of an entity is equal to
       the cumulative number of sub-entities */

    current_global_num = recv_n_sub[recv_order[0]];
    num_prev = recv_global_num[recv_order[0]];
    recv_global_num[recv_order[0]] = current_global_num;

    for (i = 1; i < n_ent_recv; i++) {
      num_cur = recv_global_num[recv_order[i]];
      if (num_cur > num_prev)
        current_global_num += recv_n_sub[recv_order[i]];
      num_prev = num_cur;
    }

  }

  /* Partial clean-up */

  BFT_FREE(recv_n_sub);
  BFT_FREE(recv_order);
  BFT_FREE(recv_global_num);

  BFT_FREE(send_count);
  BFT_FREE(recv_count);
  BFT_FREE(send_shift);
  BFT_FREE(recv_shift);

  /* At this stage, current_global_num indicates the total number of
     entities handled by this process; we must now shift global
     numberings on different processes by the cumulative total
     number of entities handled by each process */

  MPI_Allreduce(&current_global_num, &retval, 1, FVM_MPI_GNUM, MPI_SUM, comm);

  return retval;
}

#endif /* defined(FVM_HAVE_MPI) */

/*=============================================================================
 * Public function definitions
 *============================================================================*/

/*----------------------------------------------------------------------------
 * Creation of an I/O numbering structure.
 *
 * The corresponding entities must be locally ordered.
 *
 * parameters:
 *   parent_entity_number <-- pointer to list of selected entitie's parent's
 *                            numbers, or NULL if all first n_ent entities
 *                            are used
 *   parent_global_number <-- pointer to list of global (i.e. domain splitting
 *                            independent) parent entity numbers
 *   n_entities           <-- number of entities considered
 *   share_parent_global  <-- if non zero, try to share parent_global_number
 *                            instead of using a local copy
 *
 * returns:
 *  pointer to I/O numbering structure
 *----------------------------------------------------------------------------*/

fvm_io_num_t *
fvm_io_num_create(const fvm_lnum_t  parent_entity_number[],
                  const fvm_gnum_t  parent_global_number[],
                  const size_t      n_entities,
                  const int         share_parent_global)
{
  fvm_io_num_t  *this_io_num;

  /* Initial checks */

  if (fvm_parall_get_size() < 2)
    return NULL;

  assert(fvm_order_local_test(parent_entity_number,
                              parent_global_number,
                              n_entities) == true);

#if defined(FVM_HAVE_MPI)

  /* Create structure */

  BFT_MALLOC(this_io_num, 1, fvm_io_num_t);

  this_io_num->global_num_size = n_entities;

  BFT_MALLOC(this_io_num->_global_num, n_entities, fvm_gnum_t);
  this_io_num->global_num = this_io_num->_global_num;

  if (n_entities > 0) {

    size_t  i;

    /* Assign initial global numbers */

    if (parent_entity_number != NULL) {
      for (i = 0 ; i < n_entities ; i++)
        this_io_num->_global_num[i]
          = parent_global_number[parent_entity_number[i]-1];
    }
    else {
      for (i = 0 ; i < n_entities ; i++)
        this_io_num->_global_num[i] = parent_global_number[i];
    }

  }

  /* Order globally */

  this_io_num->global_count = n_entities;

  _fvm_io_num_copy_on_write(this_io_num);
  _fvm_io_num_global_order(this_io_num,
                           NULL,
                           fvm_parall_get_mpi_comm());

  if (share_parent_global != 0)
    _fvm_io_num_try_to_set_shared(this_io_num,
                                  parent_global_number);

#endif

  return this_io_num;
}

/*----------------------------------------------------------------------------
 * Creation of an I/O numbering structure,
 * sharing a given global numbering array.
 *
 * The corresponding entities must be locally ordered.
 *
 * parameters:
 *   global_number <-- pointer to list of global (i.e. domain splitting
 *                     independent) entity numbers
 *   global_count  <-- global number of entities
 *   n_entities    <-- number of local entities considered
 *
 * returns:
 *  pointer to I/O numbering structure
 *----------------------------------------------------------------------------*/

fvm_io_num_t *
fvm_io_num_create_shared(const fvm_gnum_t  global_number[],
                         fvm_gnum_t        global_count,
                         const size_t      n_entities)
{
  fvm_io_num_t  *this_io_num;

  /* Create structure */

  BFT_MALLOC(this_io_num, 1, fvm_io_num_t);

  this_io_num->global_count = global_count;
  this_io_num->global_num_size = n_entities;

  this_io_num->global_num = global_number;
  this_io_num->_global_num = NULL;

  return this_io_num;
}

/*----------------------------------------------------------------------------
 * Creation of an I/O numbering structure based on an an initial
 * I/O numbering and a number of new entities per base entity.
 *
 * This is useful for example to create an I/O numbering for
 * triangles based on split polygons, whose I/O numbering is defined.
 *
 * parameters:
 *   base_io_num    <-- pointer to base I/O numbering structure
 *   n_sub_entities <-- number of new entities per base entity
 *
 * returns:
 *  pointer to I/O numbering structure
 *----------------------------------------------------------------------------*/

fvm_io_num_t *
fvm_io_num_create_from_sub(const fvm_io_num_t  *base_io_num,
                           const fvm_lnum_t     n_sub_entities[])
{
  fvm_io_num_t  *this_io_num = NULL;

  /* Initial checks */

  if (base_io_num == NULL)
    return NULL;

  assert(fvm_parall_get_size() > 1); /* Otherwise, base_io_num should be NULL */

#if defined(FVM_HAVE_MPI)
 {
   fvm_lnum_t  i, n_ent;

   /* Create structure */

   BFT_MALLOC(this_io_num, 1, fvm_io_num_t);

   n_ent = base_io_num->global_num_size;

   BFT_MALLOC(this_io_num->_global_num, n_ent, fvm_gnum_t);
   this_io_num->global_num = this_io_num->_global_num;

   this_io_num->global_num_size = n_ent;

   /* Assign initial global numbers */

   for (i = 0 ; i < n_ent ; i++)
     this_io_num->_global_num[i] = base_io_num->global_num[i];

   /* Order globally */

   this_io_num->global_count = n_ent;

   _fvm_io_num_copy_on_write(this_io_num);
   _fvm_io_num_global_order(this_io_num,
                            n_sub_entities,
                            fvm_parall_get_mpi_comm());
 }
#endif

  return this_io_num;
}

/*----------------------------------------------------------------------------
 * Destruction of a I/O numbering structure.
 *
 * parameters:
 *   this_io_num <-- pointer to structure that should be destroyed
 *
 * returns:
 *   NULL pointer
 *----------------------------------------------------------------------------*/

fvm_io_num_t *
fvm_io_num_destroy(fvm_io_num_t  * this_io_num)
{
  if (this_io_num != NULL) {
    BFT_FREE(this_io_num->_global_num);
    BFT_FREE(this_io_num);
  }

  return this_io_num;
}

/*----------------------------------------------------------------------------
 * Return local number of entities associated with an I/O numbering
 * structure.
 *
 * parameters:
 *   this_io_num <-- pointer to I/O/ numbering structure
 *
 * returns:
 *  local number of associated entities
 *----------------------------------------------------------------------------*/

fvm_lnum_t
fvm_io_num_get_local_count(const fvm_io_num_t  *const this_io_num)
{
  assert(this_io_num != NULL);

  return this_io_num->global_num_size;
}

/*----------------------------------------------------------------------------
 * Return global number of entities associated with an I/O numbering
 * structure.
 *
 * parameters:
 *   this_io_num <-- pointer to I/O/ numbering structure
 *
 * returns:
 *  global number of associated entities
 *----------------------------------------------------------------------------*/

fvm_gnum_t
fvm_io_num_get_global_count(const fvm_io_num_t  *const this_io_num)
{
  assert(this_io_num != NULL);

  return this_io_num->global_count;
}

/*----------------------------------------------------------------------------
 * Return global numbering associated with an I/O numbering structure.
 *
 * parameters:
 *   this_io_num <-- pointer to I/O/ numbering structure
 *
 * returns:
 *  pointer to array of global numbers associated with local entities
 *  (1 to n numbering)
 *----------------------------------------------------------------------------*/

const fvm_gnum_t *
fvm_io_num_get_global_num(const fvm_io_num_t  *const this_io_num)
{
  assert(this_io_num != NULL);

  return this_io_num->global_num;
}

/*----------------------------------------------------------------------------
 * Return the global number of sub-entities associated with an initial
 * entity whose global numbering is known, given the number of
 * sub-entities per initial entity.
 *
 * parameters:
 *   this_io_num    <-> pointer to base io numbering
 *   n_sub_entities <-- number of sub-entities per initial entity
 *   comm           <-- associated MPI communicator
 *
 * returns:
 *   global number of sub-entities
 *----------------------------------------------------------------------------*/

fvm_gnum_t
fvm_io_num_global_sub_size(const fvm_io_num_t  *this_io_num,
                           const fvm_lnum_t     n_sub_entities[])
{
  fvm_gnum_t  retval = 0;

  /* Initial checks */

  if (this_io_num == NULL)
    return retval;

  assert(fvm_parall_get_size() > 1); /* Otherwise, base_io_num should be NULL */

#if defined(FVM_HAVE_MPI)
 {
   int  have_sub_loc = 0, have_sub_glob = 0;

   /* Caution: we may have sub-entities on some ranks and not on others */

   if (n_sub_entities != NULL)
     have_sub_loc = 1;

   MPI_Allreduce(&have_sub_loc, &have_sub_glob, 1, MPI_INT, MPI_MAX,
                 fvm_parall_get_mpi_comm());

   if (have_sub_glob > 0)
     retval = _fvm_io_num_global_sub_size(this_io_num,
                                          n_sub_entities,
                                          fvm_parall_get_mpi_comm());
 }
#endif

  return retval;
}

/*----------------------------------------------------------------------------
 * Dump printout of a I/O numbering structure.
 *
 * parameters:
 *   this_io_num <-- pointer to structure that should be dumped
 *----------------------------------------------------------------------------*/

void
fvm_io_num_dump(const fvm_io_num_t  *const this_io_num)
{
  fvm_lnum_t i;

  if (this_io_num == NULL) {
    bft_printf(_("  global numbering: nil\n"));
    return;
  }

  bft_printf(_("  global numbering size:            %u\n"),
             (unsigned)this_io_num->global_num_size);

  bft_printf(_("\n"
               "  pointer to shareable array:\n"
               "    global_num:                     %p\n"),
             this_io_num->global_num);

  bft_printf(_("\n"
               "  pointer to local array:\n"
               "    _global_num:                    %p\n"),
             this_io_num->global_num);

  if (this_io_num->global_num_size > 0) {

    bft_printf(_("\n  global number:\n\n"));
    for (i = 0 ; i < this_io_num->global_num_size ; i++)
      bft_printf("  %10u : %10lu\n",
                 (unsigned)i + 1,
                 (unsigned long)this_io_num->global_num[i]);
  }
}

/*----------------------------------------------------------------------------*/

#ifdef __cplusplus
}
#endif /* __cplusplus */
