/*
 * Copyright (c) 2004-2010 The Trustees of Indiana University.
 *                         All rights reserved.
 * Copyright (c) 2004-2011 The Trustees of the University of Tennessee.
 *                         All rights reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2007      Evergrid, Inc. All rights reserved.
 *
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */
#include "orte_config.h"

#include <sys/types.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif  /* HAVE_UNISTD_H */
#include <string.h>

#include "opal/include/opal/prefetch.h"
#include "opal/util/output.h"
#include "opal/util/opal_environ.h"
#include "opal/util/basename.h"
#include "opal/util/show_help.h"
#include "orte/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"

#include "orte/util/name_fns.h"
#include "orte/util/proc_info.h"
#include "orte/runtime/orte_globals.h"
#include "opal/dss/dss.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/plm/plm.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/runtime/orte_wait.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/errmgr/base/base.h"

#include "orte/mca/snapc/snapc.h"
#include "orte/mca/snapc/base/base.h"

#include "snapc_full.h"

#include MCA_timer_IMPLEMENTATION_HEADER

/************************************
 * Locally Global vars & functions :)
 ************************************/
#define INC_SEQ_NUM()                         \
 {                                            \
   if(orte_snapc_base_store_only_one_seq) {   \
     orte_snapc_base_snapshot_seq_number = 0; \
   } else {                                   \
     orte_snapc_base_snapshot_seq_number++;   \
   }                                          \
 }

static orte_jobid_t current_global_jobid = ORTE_JOBID_INVALID;
static orte_snapc_base_global_snapshot_t global_snapshot;
static int current_total_orteds = 0;
static bool updated_job_to_running;
static int current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE;
static bool cleanup_on_establish = false;
static bool global_coord_has_local_children = false;

static bool currently_migrating = false;
static opal_list_t *migrating_procs = NULL;

static int global_init_job_structs(void);
static int global_refresh_job_structs(void);

static bool snapc_orted_recv_issued = false;
static bool is_orte_checkpoint_connected = false;
static bool is_app_checkpointable = false;
static int snapc_full_global_start_listener(void);
static int snapc_full_global_stop_listener(void);
static void snapc_full_global_orted_recv(int status,
                                         orte_process_name_t* sender,
                                         opal_buffer_t* buffer,
                                         orte_rml_tag_t tag,
                                         void* cbdata);

static void snapc_full_process_restart_proc_info_cmd(orte_process_name_t* sender,
                                                     opal_buffer_t* buffer);

static void snapc_full_process_request_op_cmd(orte_process_name_t* sender,
                                              opal_buffer_t* buffer);

/*** Command Line Interactions */
static orte_process_name_t orte_checkpoint_sender;
static bool snapc_cmdline_recv_issued = false;
static int snapc_full_global_start_cmdline_listener(void);
static int snapc_full_global_stop_cmdline_listener(void);
static void snapc_full_global_cmdline_recv(int status,
                                           orte_process_name_t* sender,
                                           opal_buffer_t* buffer,
                                           orte_rml_tag_t tag,
                                           void* cbdata);

static int snapc_full_establish_snapshot_dir(bool empty_metadata);

/*** */
static int snapc_full_global_checkpoint(opal_crs_base_ckpt_options_t *options);
static int snapc_full_global_notify_checkpoint(orte_jobid_t jobid,
                                               opal_crs_base_ckpt_options_t *options);
static int orte_snapc_full_global_set_job_ckpt_info( orte_jobid_t jobid,
                                                     int ckpt_state,
                                                     orte_sstore_base_handle_t handle,
                                                     bool quick,
                                                     opal_crs_base_ckpt_options_t *options);
int global_coord_job_state_update(orte_jobid_t jobid,
                                  int job_ckpt_state,
                                  orte_sstore_base_handle_t handle,
                                  opal_crs_base_ckpt_options_t *options);
static void snapc_full_process_job_update_cmd(orte_process_name_t* sender,
                                              opal_buffer_t* buffer,
                                              bool quick);
static int snapc_full_process_orted_update_cmd(orte_process_name_t* sender,
                                               opal_buffer_t* buffer,
                                               bool quick);
static orte_snapc_full_orted_snapshot_t *find_orted_snapshot(orte_process_name_t *name );

static int snapc_full_global_get_min_state(void);
static int write_out_global_metadata(void);

static int orte_snapc_full_global_reset_coord(void);

/*
 * Timer stuff
 */
static void snapc_full_set_time(int idx);
static void snapc_full_display_all_timers(void);
static void snapc_full_display_recovered_timers(void);
static void snapc_full_clear_timers(void);

static double snapc_full_get_time(void);
static void snapc_full_display_indv_timer_core(double diff, char *str);

#define SNAPC_FULL_TIMER_START     0
#define SNAPC_FULL_TIMER_RUNNING   1
#define SNAPC_FULL_TIMER_FIN_LOCAL 2
#define SNAPC_FULL_TIMER_SS_SYNC   3
#define SNAPC_FULL_TIMER_ESTABLISH 4
#define SNAPC_FULL_TIMER_RECOVERED 5
#define SNAPC_FULL_TIMER_MAX       6

static double timer_start[SNAPC_FULL_TIMER_MAX];

#define SNAPC_FULL_CLEAR_TIMERS()                                       \
    {                                                                   \
        if(OPAL_UNLIKELY(orte_snapc_full_timing_enabled)) {             \
            snapc_full_clear_timers();                                  \
        }                                                               \
    }

#define SNAPC_FULL_SET_TIMER(idx)                                       \
    {                                                                   \
        if(OPAL_UNLIKELY(orte_snapc_full_timing_enabled)) {             \
            snapc_full_set_time(idx);                                   \
        }                                                               \
    }

#define SNAPC_FULL_DISPLAY_ALL_TIMERS()                                 \
    {                                                                   \
        if(OPAL_UNLIKELY(orte_snapc_full_timing_enabled)) {             \
            snapc_full_display_all_timers();                            \
        }                                                               \
    }
#define SNAPC_FULL_DISPLAY_RECOVERED_TIMER()                            \
    {                                                                   \
        if(OPAL_UNLIKELY(orte_snapc_full_timing_enabled)) {             \
            snapc_full_display_recovered_timers();                      \
        }                                                               \
    }

/*
 * Progress
 */
static void snapc_full_report_progress(orte_snapc_full_orted_snapshot_t *orted_snapshot,
                                       int total,
                                       int min_state);
static int    report_progress_cur_loc_finished = 0;
static double report_progress_last_reported_loc_finished = 0;
#define SNAPC_FULL_REPORT_PROGRESS(orted, total, min_state)             \
    {                                                                   \
        if(OPAL_UNLIKELY(orte_snapc_full_progress_meter > 0)) {         \
            snapc_full_report_progress(orted, total, min_state);        \
        }                                                               \
    }

/************************
 * Function Definitions
 ************************/
int global_coord_init(void)
{
    current_global_jobid = ORTE_JOBID_INVALID;
    orte_snapc_base_snapshot_seq_number = -1;

    orte_checkpoint_sender = orte_name_invalid;

    SNAPC_FULL_CLEAR_TIMERS();

    return ORTE_SUCCESS;
}

int global_coord_finalize(void)
{
    current_global_jobid = ORTE_JOBID_INVALID;
    orte_snapc_base_snapshot_seq_number = -1;

    SNAPC_FULL_CLEAR_TIMERS();

    return ORTE_SUCCESS;
}

int global_coord_setup_job(orte_jobid_t jobid) {
    int ret, exit_status = ORTE_SUCCESS;
    orte_job_t *jdata = NULL;

    /*
     * Only allow one job at a time.
     *
     * It is possible to pass through this function twice since HNP may also be
     * a local daemon. So it may be both a global and local coordinator.
     *  Global: orte_plm_base_setup_job()
     *  Local : odls_default_module.c
     */
    /* Global Coordinator pass */
    if( ORTE_JOBID_INVALID == current_global_jobid ) {
        current_global_jobid = jobid;
        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) Setup job %s as the Global Coordinator\n",
                             ORTE_JOBID_PRINT(jobid)));

        SNAPC_FULL_CLEAR_TIMERS();
        SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_START);
    }
    /* Local Coordinator pass - Always happens after global coordinator pass */
    else if ( jobid == current_global_jobid ) {

        /* look up job data object */
        if (NULL == (jdata = orte_get_job_data_object(current_global_jobid))) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }

        if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) Restarting Job %s...",
                                 ORTE_JOBID_PRINT(jobid)));
            SNAPC_FULL_CLEAR_TIMERS();
            SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_START);

            if( ORTE_SUCCESS != (ret = global_refresh_job_structs()) ) {
                ORTE_ERROR_LOG(ret);
                return ret;
            }
            if( ORTE_SNAPC_LOCAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_LOCAL_COORD_TYPE) ) {
                return local_coord_setup_job(jobid);
            }
            return ORTE_SUCCESS;
        }

        /* If there are no local children, do not become a local coordinator */
        if( !global_coord_has_local_children ) {
            return ORTE_SUCCESS;
        }

        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) Setup job %s as the Local Coordinator\n",
                             ORTE_JOBID_PRINT(jobid)));
        orte_snapc_coord_type |= ORTE_SNAPC_LOCAL_COORD_TYPE;
        return local_coord_setup_job(jobid);
    }
    /* Only allow one job at a time */
    else {
        opal_output(mca_snapc_full_component.super.output_handle,
                    "Global) Setup of job %s Failed! Already setup job %s\n",
                    ORTE_JOBID_PRINT(jobid), ORTE_JOBID_PRINT(current_global_jobid));
        ORTE_ERROR_LOG(ORTE_ERROR);
        return ORTE_ERROR;
    }

    /*
     * Start out with a sequence number just below the first
     * This will be incremented when we checkpoint
     */
    orte_snapc_base_snapshot_seq_number = -1;

    /*
     * Allocate structure to track node status
     */
    if( ORTE_SUCCESS != (ret = global_init_job_structs()) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Setup Global Coordinator command processing listener
     */
    if( ORTE_SUCCESS != (ret = snapc_full_global_start_listener()) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Setup command line tool checkpoint request listener
     */
    if( ORTE_SUCCESS != (ret = snapc_full_global_start_cmdline_listener()) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * If requested pre-establish the global snapshot directory
     */
#if 0
    if(orte_snapc_base_establish_global_snapshot_dir) {
        opal_output(0, "Global) Error: Pre-establishment of snapshot directory currently not supported!");
        ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);

        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) Pre-establish the global snapshot directory\n"));
        if( ORTE_SUCCESS != (ret = snapc_full_establish_snapshot_dir(true))) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }
#endif

    OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                         "Global) Finished setup of job %s ",
                         ORTE_JOBID_PRINT(jobid)));

 cleanup:
    return exit_status;
}

int global_coord_release_job(orte_jobid_t jobid) {
    int ret, exit_status = ORTE_SUCCESS;

    /*
     * Make sure we are not waiting on a checkpoint to complete
     */
    if( is_orte_checkpoint_connected ) {
        if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender,
                                                                                global_snapshot.ss_handle,
                                                                                ORTE_SNAPC_CKPT_STATE_ERROR)) ) {
            ORTE_ERROR_LOG(ret);
        }
    }

    /*
     * Clean up listeners
     */
    if( ORTE_SUCCESS != (ret = snapc_full_global_stop_cmdline_listener()) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
    }

    if( ORTE_SUCCESS != (ret = snapc_full_global_stop_listener()) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
    }

    OBJ_DESTRUCT(&global_snapshot);

    return exit_status;
}

int global_coord_start_ckpt(orte_snapc_base_quiesce_t *datum)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_std_cntr_t i_proc;
    orte_proc_t *proc = NULL;
    orte_proc_t *new_proc = NULL;
    opal_list_item_t *item = NULL;
    opal_crs_base_ckpt_options_t *options = NULL;
    char *tmp_str = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                         "Global) Starting checkpoint (internally requested)"));

    orte_checkpoint_sender = orte_name_invalid;

    /*
     * If migrating
     */
    if( datum->migrating ) {
        currently_migrating = true;
        if( NULL != migrating_procs ) {
            while( NULL != (item = opal_list_remove_first(migrating_procs)) ) {
                proc = (orte_proc_t*)item;
                OBJ_RELEASE(proc);
            }
        } else {
            migrating_procs = OBJ_NEW(opal_list_t);
        }

        /*
         * Copy over the procs into a list
         */
        for(i_proc = 0; i_proc < opal_pointer_array_get_size(&(datum->migrating_procs)); ++i_proc) {
            proc = (orte_proc_t*)opal_pointer_array_get_item(&(datum->migrating_procs), i_proc);
            if( NULL == proc ) {
                continue;
            }

            new_proc = OBJ_NEW(orte_proc_t);
            new_proc->name.jobid = proc->name.jobid;
            new_proc->name.vpid  = proc->name.vpid;
            new_proc->node = OBJ_NEW(orte_node_t);
            new_proc->node->name = proc->node->name;
            opal_list_append(migrating_procs, &new_proc->super);
            OBJ_RETAIN(new_proc);
        }

        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) SnapC Migrating Processes: (%d procs) [Updated]\n",
                             (int)opal_list_get_size(migrating_procs) ));
        for (item  = opal_list_get_first(migrating_procs);
             item != opal_list_get_end(migrating_procs);
             item  = opal_list_get_next(item)) {
            new_proc = (orte_proc_t*)item;
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "\t\"%s\" [%s]\n",
                                 ORTE_NAME_PRINT(&new_proc->name),new_proc->node->name));
        }
    }

    /*************************
     * Kick off the checkpoint (local coord will release the processes)
     *************************/
    options = OBJ_NEW(opal_crs_base_ckpt_options_t);
    if( ORTE_SUCCESS != (ret = snapc_full_global_checkpoint(options) ) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Wait for checkpoint to locally finish on all nodes
     */
    while(((currently_migrating  && current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_MIGRATING) ||
           (!currently_migrating && current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL)) &&
          current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_ESTABLISHED &&
          current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_RECOVERED &&
          current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_ERROR &&
          current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_NONE ) {
        opal_progress();
    }

    /*
     * Update the quiesce structure with the handle
     */
    datum->snapshot = OBJ_NEW(orte_snapc_base_global_snapshot_t);

    datum->ss_handle = global_snapshot.ss_handle;
    datum->ss_snapshot = OBJ_NEW(orte_sstore_base_global_snapshot_info_t);
    if( ORTE_SUCCESS != (ret = orte_sstore.request_global_snapshot_data(&(datum->ss_handle), datum->ss_snapshot)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /* JJH Is the snapc structure useful with the sstore structure ??? */
    orte_sstore.get_attr(global_snapshot.ss_handle,
                         SSTORE_METADATA_GLOBAL_SNAP_SEQ,
                         &tmp_str);
    datum->epoch = atoi(tmp_str);

    if( NULL != tmp_str ) {
        free(tmp_str);
        tmp_str = NULL;
    }

 cleanup:
    if( NULL != options ) {
        OBJ_RELEASE(options);
        options = NULL;
    }

    return exit_status;
}

int global_coord_end_ckpt(orte_snapc_base_quiesce_t *datum)
{
    int ret, exit_status = ORTE_SUCCESS;
    opal_list_item_t* item = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                         "Global) Finishing checkpoint (internally requested) [%3d]",
                         current_job_ckpt_state));

    if( currently_migrating ) {
        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) End Ckpt: Flush the modex cached data\n"));

        /* TODO: You can't pass NULL as the identifier - what you'll need to do is
         * close all open dstore handles, and then open the ones you need
         */
#if 0
        if (OPAL_SUCCESS != (ret = opal_dstore.remove(NULL, NULL))) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
#endif

        SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_ESTABLISH);
        if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(current_global_jobid,
                                                                            ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL,
                                                                            global_snapshot.ss_handle,
                                                                            true, NULL) ) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }

    while(current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_RECOVERED &&
          current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_ERROR &&
          current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_NONE ) {
        opal_progress();
    }

    /*
     * Update the job structure since processes may have moved around
     */
    if( ORTE_SUCCESS != (ret = global_refresh_job_structs()) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                         "Global) Finished checkpoint (internally requested) [%d]",
                         current_job_ckpt_state));

    if( currently_migrating ) {
        current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE;
        cleanup_on_establish = false;

        report_progress_cur_loc_finished = 0;
        report_progress_last_reported_loc_finished = 0;
    }

 cleanup:

    currently_migrating = false;
    if( NULL != migrating_procs ) {
        while( NULL != (item = opal_list_remove_first(migrating_procs)) ) {
            OBJ_RELEASE(item);
        }
        OBJ_RELEASE(migrating_procs);
        migrating_procs = NULL;
    }

    return exit_status;
}

/******************
 * Local functions
 ******************/
static int global_init_job_structs(void)
{
    orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL;
    orte_snapc_base_local_snapshot_t *app_snapshot = NULL;
    opal_list_item_t* orted_item = NULL;
    orte_node_t *cur_node = NULL;
    orte_job_map_t *map = NULL;
    orte_job_t *jdata = NULL;
    orte_proc_t **procs = NULL;
    orte_std_cntr_t i = 0;
    orte_vpid_t p = 0;
    orte_ns_cmp_bitmask_t mask;
    bool found = false;

    /* look up job data object */
    if (NULL == (jdata = orte_get_job_data_object(current_global_jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }

    OBJ_CONSTRUCT(&global_snapshot, orte_snapc_base_global_snapshot_t);

    map = jdata->map;

    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (cur_node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }

        procs = (orte_proc_t**)cur_node->procs->addr;

        /*
         * Look out for duplicates
         * JJH: Should not happen, but does if rmaps get a bug in setting up the map.
         */
        found = false;
        for(orted_item  = opal_list_get_first(&(global_snapshot.local_snapshots));
            orted_item != opal_list_get_end(&(global_snapshot.local_snapshots));
            orted_item  = opal_list_get_next(orted_item) ) {
            orted_snapshot = (orte_snapc_full_orted_snapshot_t*)orted_item;
            /*
             * Is in list?
             */
            if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
                                                           &(cur_node->daemon->name),
                                                           &(orted_snapshot->process_name) )) {
                found = true;
                break;
            }
        }
        if( found ) {
            OPAL_OUTPUT_VERBOSE((1, mca_snapc_full_component.super.output_handle,
                                 "Global) [%d] Found Daemon %s with %d procs - Duplicate!! - Should not happen!",
                                 i, ORTE_NAME_PRINT(&(cur_node->daemon->name)), cur_node->num_procs));
            continue;
        }

        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) [%d] Found Daemon %s with %d procs",
                             i, ORTE_NAME_PRINT(&(cur_node->daemon->name)), cur_node->num_procs));

        orted_snapshot = OBJ_NEW(orte_snapc_full_orted_snapshot_t);

        orted_snapshot->process_name.jobid  = cur_node->daemon->name.jobid;
        orted_snapshot->process_name.vpid   = cur_node->daemon->name.vpid;

        mask = ORTE_NS_CMP_JOBID;

        if (OPAL_EQUAL ==
                orte_util_compare_name_fields(mask, &orted_snapshot->process_name, ORTE_PROC_MY_NAME)) {
            global_coord_has_local_children = true;
        }

        for(p = 0; p < cur_node->num_procs; ++p) {
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) \t [%d] Found Process %s on Daemon %s",
                                 p, ORTE_NAME_PRINT(&(procs[p]->name)), ORTE_NAME_PRINT(&(cur_node->daemon->name)) ));

            app_snapshot = OBJ_NEW(orte_snapc_base_local_snapshot_t);

            app_snapshot->process_name.jobid = procs[p]->name.jobid;
            app_snapshot->process_name.vpid = procs[p]->name.vpid;

            opal_list_append(&(orted_snapshot->super.local_snapshots), &(app_snapshot->super));
        }


        opal_list_append(&global_snapshot.local_snapshots, &(orted_snapshot->super.super));
    }

    return ORTE_SUCCESS;
}

static int global_refresh_job_structs(void)
{
    orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL;
    orte_snapc_base_local_snapshot_t *app_snapshot = NULL;
    opal_list_item_t* orted_item = NULL;
    opal_list_item_t* app_item = NULL;
    opal_list_item_t* item = NULL;
    orte_node_t *cur_node = NULL;
    orte_job_map_t *map = NULL;
    orte_job_t *jdata = NULL;
    orte_proc_t **procs = NULL;
    orte_proc_t *new_proc = NULL;
    orte_std_cntr_t i = 0;
    orte_vpid_t p = 0;
    bool found = false;
    orte_ns_cmp_bitmask_t mask;

    /* look up job data object */
    if (NULL == (jdata = orte_get_job_data_object(current_global_jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }

    OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                         "Global) Refreshing Job Structures... [%3d]",
                         current_job_ckpt_state));

    if( NULL != migrating_procs ) {
        for (item  = opal_list_get_first(migrating_procs);
             item != opal_list_get_end(migrating_procs);
             item  = opal_list_get_next(item)) {
            new_proc = (orte_proc_t*)item;

            /*
             * Look through all daemons
             */
            found = false;
            for(orted_item  = opal_list_get_first(&(global_snapshot.local_snapshots));
                orted_item != opal_list_get_end(&(global_snapshot.local_snapshots));
                orted_item  = opal_list_get_next(orted_item) ) {
                orted_snapshot = (orte_snapc_full_orted_snapshot_t*)orted_item;

                /*
                 * Look through all processes tracked by this daemon
                 */
                for(app_item  = opal_list_get_first(&(orted_snapshot->super.local_snapshots));
                    app_item != opal_list_get_end(&(orted_snapshot->super.local_snapshots));
                    app_item  = opal_list_get_next(app_item) ) {
                    app_snapshot = (orte_snapc_base_local_snapshot_t*)app_item;

                    if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
                                                                   &(new_proc->name),
                                                                   &(app_snapshot->process_name) )) {
                        found = true;
                        opal_list_remove_item(&(orted_snapshot->super.local_snapshots), app_item);
                        break;
                    }
                }

                if( found ) {
                    break;
                }
            }
        }
    }

    /*
     * First make sure that all of the orted's have the proper number of
     * children, if no children, then stop tracking.
     */
    map = jdata->map;
    for(orted_item  = opal_list_get_first(&(global_snapshot.local_snapshots));
        orted_item != opal_list_get_end(&(global_snapshot.local_snapshots));
        orted_item  = opal_list_get_next(orted_item) ) {
        orted_snapshot = (orte_snapc_full_orted_snapshot_t*)orted_item;

        /* Make sure this orted is in the map */
        found = false;
        for (i=0; i < map->nodes->size; i++) {
            if (NULL == (cur_node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                continue;
            }

            if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
                                                           &(cur_node->daemon->name),
                                                           &(orted_snapshot->process_name) )) {
                found = true;
                break;
            }
        }
        /* If not, then remove all processes, keep ref. we might reuse it later */
        if( !found ) {
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) Found Empty Daemon %s not in map (Refresh)",
                                 ORTE_NAME_PRINT(&(orted_snapshot->process_name)) ));
            while( NULL != (item = opal_list_remove_first(&(orted_snapshot->super.local_snapshots))) ) {
                OBJ_RELEASE(item);
            }
        }
    }

    /*
     * Look for new nodes
     */
    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (cur_node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }

        procs = (orte_proc_t**)cur_node->procs->addr;

        /*
         * See if we are already tracking it, if so refresh it
         * (This daemon could have been restarted, and processes migrated back to it)
         */
        found = false;
        for(orted_item  = opal_list_get_first(&(global_snapshot.local_snapshots));
            orted_item != opal_list_get_end(&(global_snapshot.local_snapshots));
            orted_item  = opal_list_get_next(orted_item) ) {
            orted_snapshot = (orte_snapc_full_orted_snapshot_t*)orted_item;

            if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
                                                           &(cur_node->daemon->name),
                                                           &(orted_snapshot->process_name) )) {
                found = true;
                break;
            }
        }
        if( found ) {
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) [%d] Found Daemon %s with %d procs (Refresh)",
                                 i, ORTE_NAME_PRINT(&(cur_node->daemon->name)), cur_node->num_procs));

            /* Remove all old processes */
            while( NULL != (item = opal_list_remove_first(&(orted_snapshot->super.local_snapshots))) ) {
                OBJ_RELEASE(item);
            }

            /* Add back new processes (a bit of overkill, sure, but it works) */
            for(p = 0; p < cur_node->num_procs; ++p) {
                if( NULL == procs[p] ) {
                    continue;
                }

                OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                     "Global) \t [%d] Found Process %s on Daemon %s",
                                     p, ORTE_NAME_PRINT(&(procs[p]->name)), ORTE_NAME_PRINT(&(cur_node->daemon->name)) ));

                app_snapshot = OBJ_NEW(orte_snapc_base_local_snapshot_t);

                app_snapshot->process_name.jobid = procs[p]->name.jobid;
                app_snapshot->process_name.vpid = procs[p]->name.vpid;

                opal_list_append(&(orted_snapshot->super.local_snapshots), &(app_snapshot->super));
            }

            continue;
        }

        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) [%d] Found Daemon %s with %d procs",
                             i, ORTE_NAME_PRINT(&(cur_node->daemon->name)), cur_node->num_procs));

        orted_snapshot = OBJ_NEW(orte_snapc_full_orted_snapshot_t);

        orted_snapshot->process_name.jobid  = cur_node->daemon->name.jobid;
        orted_snapshot->process_name.vpid   = cur_node->daemon->name.vpid;

        mask = ORTE_NS_CMP_ALL;

        if (OPAL_EQUAL ==
                orte_util_compare_name_fields(mask, &orted_snapshot->process_name, ORTE_PROC_MY_NAME)) {
            global_coord_has_local_children = true;
        }
        for(p = 0; p < cur_node->num_procs; ++p) {
            if( NULL == procs[p] ) {
                continue;
            }

            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) \t [%d] Found Process %s on Daemon %s",
                                 p, ORTE_NAME_PRINT(&(procs[p]->name)), ORTE_NAME_PRINT(&(cur_node->daemon->name)) ));

            app_snapshot = OBJ_NEW(orte_snapc_base_local_snapshot_t);

            app_snapshot->process_name.jobid = procs[p]->name.jobid;
            app_snapshot->process_name.vpid = procs[p]->name.vpid;

            opal_list_append(&(orted_snapshot->super.local_snapshots), &(app_snapshot->super));
        }

        opal_list_append(&global_snapshot.local_snapshots, &(orted_snapshot->super.super));
    }

    return ORTE_SUCCESS;
}

/*****************
 * Setup listeners
 *****************/
static int snapc_full_global_start_listener(void)
{
    if (snapc_orted_recv_issued && ORTE_PROC_IS_HNP) {
        return ORTE_SUCCESS;
    }

    OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                         "Global) Startup Coordinator Channel"));

    /*
     * Coordinator command listener
     */
    orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SNAPC_FULL,
                            ORTE_RML_PERSISTENT, snapc_full_global_orted_recv, NULL);

    snapc_orted_recv_issued = true;

    return ORTE_SUCCESS;
}

static int snapc_full_global_stop_listener(void)
{
    if (!snapc_orted_recv_issued && ORTE_PROC_IS_HNP) {
        return ORTE_SUCCESS;
    }

    OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                         "Global) Shutdown Coordinator Channel"));

    orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SNAPC_FULL);

    snapc_orted_recv_issued = false;
    return ORTE_SUCCESS;
}

static int snapc_full_global_start_cmdline_listener(void)
{
    if (snapc_cmdline_recv_issued && ORTE_PROC_IS_HNP) {
        return ORTE_SUCCESS;
    }

    OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                         "Global) Startup Command Line Channel"));

    /*
     * Coordinator command listener
     */
    orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_CKPT, 0,
                            snapc_full_global_cmdline_recv, NULL);

    snapc_cmdline_recv_issued = true;
    return ORTE_SUCCESS;
}

static int snapc_full_global_stop_cmdline_listener(void)
{
    if (!snapc_cmdline_recv_issued && ORTE_PROC_IS_HNP) {
        return ORTE_SUCCESS;
    }

    OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                         "Global) Shutdown Command Line Channel"));

    orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_CKPT);

    snapc_cmdline_recv_issued = false;
    return ORTE_SUCCESS;
}

/*****************
 * Listener Callbacks
 *****************/
static void snapc_full_global_cmdline_recv(int status,
                                           orte_process_name_t* sender,
                                           opal_buffer_t* buffer,
                                           orte_rml_tag_t tag,
                                           void* cbdata)
{
    int ret;
    orte_snapc_cmd_flag_t command;
    orte_std_cntr_t count = 1;
    orte_jobid_t jobid;
    opal_crs_base_ckpt_options_t *options = NULL;

    if( ORTE_RML_TAG_CKPT != tag ) {
        opal_output(mca_snapc_full_component.super.output_handle,
                    "Global) Error: Unknown tag: Received a command message from %s (tag = %d).",
                    ORTE_NAME_PRINT(sender), tag);
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return;
    }

    OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                         "Global) Command Line: Start a checkpoint operation [Sender = %s]",
                         ORTE_NAME_PRINT(sender)));

    snapc_cmdline_recv_issued = false; /* Not a persistent RML message */

    options = OBJ_NEW(opal_crs_base_ckpt_options_t);

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_SNAPC_CMD))) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    /*
     * orte_checkpoint has requested that a checkpoint be taken
     */
    if (ORTE_SNAPC_GLOBAL_INIT_CMD == command) {
        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) Command line requested a checkpoint [command %d]\n",
                             command));

        /*
         * Unpack the buffer from the orte_checkpoint command
         */
        if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_init_cmd(sender,
                                                                              buffer,
                                                                              options,
                                                                              &jobid)) ) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        orte_checkpoint_sender = *sender;
        is_orte_checkpoint_connected = true;

        /*
         * If the application is not ready for a checkpoint,
         * then send back an error.
         */
        if( !is_app_checkpointable ) {
            OPAL_OUTPUT_VERBOSE((1, mca_snapc_full_component.super.output_handle,
                                 "Global) request_cmd(): Checkpointing currently disabled, rejecting request"));
            if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender,
                                                                                    0,
                                                                                    ORTE_SNAPC_CKPT_STATE_ERROR))) {
                ORTE_ERROR_LOG(ret);
            }

            orte_checkpoint_sender = orte_name_invalid;
            is_orte_checkpoint_connected = false;

            /* Reset the listener */
            if( ORTE_SUCCESS != (ret = snapc_full_global_start_cmdline_listener() ) ){
                ORTE_ERROR_LOG(ret);
            }

            goto cleanup;
        }

        /*
         * If the jobid was specified, and does not match the current job, then fail
         */
        if( ORTE_JOBID_INVALID != jobid && jobid != current_global_jobid) {
            opal_output(mca_snapc_full_component.super.output_handle,
                        "Global) Error: Jobid %s does not match the current jobid %s",
                        ORTE_JOBID_PRINT(jobid), ORTE_JOBID_PRINT(current_global_jobid));
            ORTE_ERROR_LOG(ORTE_ERROR);
            goto cleanup;
        }

        /*************************
         * Kick off the checkpoint
         *************************/
        SNAPC_FULL_CLEAR_TIMERS();
        SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_START);

        if( ORTE_SUCCESS != (ret = snapc_full_global_checkpoint(options) ) ) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

    }
    /*
     * Terminate the connection (Not currently implemented)
     */
    else if (ORTE_SNAPC_GLOBAL_TERM_CMD == command) {
        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) Command line requested to terminate connection (command %d)\n",
                             command));
        ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
        goto cleanup;
    }
    /*
     * Unknown command
     */
    else {
        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) Command line sent an unknown command (command %d)\n",
                             command));
        ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
        goto cleanup;
    }

 cleanup:
    if( NULL != options ) {
        OBJ_RELEASE(options);
        options = NULL;
    }

    return;
}

void snapc_full_global_orted_recv(int status,
                                  orte_process_name_t* sender,
                                  opal_buffer_t* buffer,
                                  orte_rml_tag_t tag,
                                  void* cbdata)
{
    int ret;
    orte_snapc_full_cmd_flag_t command;
    orte_std_cntr_t count;
    static int num_inside = 0;

    if( ORTE_RML_TAG_SNAPC_FULL != tag ) {
        opal_output(mca_snapc_full_component.super.output_handle,
                    "Global) Error: Unknown tag: Received a command message from %s (tag = %d).",
                    ORTE_NAME_PRINT(sender), tag);
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return;
    }

    /*
     * This is a message from a Local Coordinator
     */
    OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                         "Global) Receive a command message from %s.",
                         ORTE_NAME_PRINT(sender)));

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_SNAPC_FULL_CMD))) {
        ORTE_ERROR_LOG(ret);
        return;
    }

    ++num_inside;

    switch (command) {
        case ORTE_SNAPC_FULL_UPDATE_JOB_STATE_QUICK_CMD:
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) Command: Job State Update (quick)"));

            snapc_full_process_job_update_cmd(sender, buffer, true);
            break;

        case ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD:
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) Command: Job State Update"));

            snapc_full_process_job_update_cmd(sender, buffer, false);
            break;

        case ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_QUICK_CMD:
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) Command: Daemon State Update (quick)"));

            snapc_full_process_orted_update_cmd(sender, buffer, true);
            break;

        case ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_CMD:
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) Command: Daemon State Update"));

            snapc_full_process_orted_update_cmd(sender, buffer, false);
            break;

        case ORTE_SNAPC_FULL_RESTART_PROC_INFO:
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) Command: Update hostname/pid associations"));

            snapc_full_process_restart_proc_info_cmd(sender, buffer);
            break;

        case ORTE_SNAPC_FULL_REQUEST_OP_CMD:
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) Command: Request Op"));

            snapc_full_process_request_op_cmd(sender, buffer);
            break;

        default:
            ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
    }

    return;
}

static void snapc_full_process_request_op_cmd(orte_process_name_t* sender,
                                              opal_buffer_t* sbuffer)
{
    int ret;
    orte_std_cntr_t count = 1;
    orte_jobid_t jobid;
    int op_event, op_state;
    opal_crs_base_ckpt_options_t *options = NULL;
    opal_buffer_t *buffer = NULL;
    orte_snapc_full_cmd_flag_t command = ORTE_SNAPC_FULL_REQUEST_OP_CMD;
    int seq_num = -1, i;
    char * global_handle = NULL, *tmp_str = NULL;
    orte_snapc_base_request_op_t *datum = NULL;

    orte_checkpoint_sender = orte_name_invalid;

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(sbuffer, &jobid, &count, ORTE_JOBID))) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(sbuffer, &op_event, &count, OPAL_INT))) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                         "Global) process_request_op(): Op Code %2d\n",
                         op_event));

    /************************************
     * Application have been initialized, and are ready for checkpointing
     ************************************/
    if( ORTE_SNAPC_OP_INIT == op_event ) {
        OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Checkpointing Enabled (%2d)\n",
                             op_event));
        is_app_checkpointable = true;
    }
    /************************************
     * Application is finalizing, and no longer ready for checkpointing.
     ************************************/
    else if( ORTE_SNAPC_OP_FIN == op_event ) {
        OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Checkpointing Disabled (%2d)\n",
                             op_event));
        is_app_checkpointable = false;

        /*
         * Wait for any ongoing checkpoints to finish
         */
        if( current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_ERROR &&
            current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_NONE ) {
            OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle,
                                 "Global) process_request_op(): Wait for ongoing checkpoint to complete..."));
            while( current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_ERROR &&
                   current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_NONE ) {
                opal_progress();
            }
        }

        /*
         * Tell application that it is now ok to finailze
         */
        OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Send Finalize ACK to the job"));

        buffer = OBJ_NEW(opal_buffer_t);
        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &command, 1, ORTE_SNAPC_FULL_CMD))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        op_event = ORTE_SNAPC_OP_FIN_ACK;
        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &op_event, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(sender, buffer, ORTE_RML_TAG_SNAPC_FULL,
                                                           orte_rml_send_callback, NULL))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }
        /* buffer should not be released here; the callback releases it */
        buffer = NULL;
    }
    /************************************
     * Start a checkpoint operation
     ************************************/
    else if( ORTE_SNAPC_OP_CHECKPOINT == op_event ) {
        OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Starting checkpoint (%2d)\n",
                             op_event));

        options = OBJ_NEW(opal_crs_base_ckpt_options_t);
        if( ORTE_SUCCESS != (ret = snapc_full_global_checkpoint(options) ) ) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        /*
         * Wait for the operation to complete
         */
        while( current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_ERROR &&
               current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_NONE ) {
            opal_progress();
        }

        if( ORTE_SNAPC_CKPT_STATE_ERROR == current_job_ckpt_state ) {
            op_state = -1;
        } else {
            op_state = 0;
        }

        /*
         * Tell the sender that the operation is finished
         */
        buffer = OBJ_NEW(opal_buffer_t);
        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &command, 1, ORTE_SNAPC_FULL_CMD))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &op_event, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &op_state, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(sender, buffer, ORTE_RML_TAG_SNAPC_FULL,
                                                           orte_rml_send_callback, NULL))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }
        /* buffer should not be released here; the callback releases it */
        buffer = NULL;
    }
    /************************************
     * Start the Restart operation
     ************************************/
    else if( ORTE_SNAPC_OP_RESTART == op_event ) {
        OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Starting restart (%2d)\n",
                             op_event));

        count = 1;
        if (ORTE_SUCCESS != (ret = opal_dss.unpack(sbuffer, &seq_num, &count, OPAL_INT))) {
            ORTE_ERROR_LOG(ret);
            orte_snapc_ckpt_state_notify(ORTE_SNAPC_CKPT_STATE_NO_RESTART);
            goto cleanup;
        }

        count = 1;
        if (ORTE_SUCCESS != (ret = opal_dss.unpack(sbuffer, &global_handle, &count, OPAL_STRING))) {
            ORTE_ERROR_LOG(ret);
            orte_snapc_ckpt_state_notify(ORTE_SNAPC_CKPT_STATE_NO_RESTART);
            goto cleanup;
        }

        /*
         * Kick off the restart
         */
        if( ORTE_SUCCESS != (ret = orte_errmgr_base_restart_job(current_global_jobid, global_handle, seq_num) ) ) {
            ORTE_ERROR_LOG(ret);
            orte_snapc_ckpt_state_notify(ORTE_SNAPC_CKPT_STATE_NO_RESTART);
            goto cleanup;
        }
    }
    /************************************
     * Start the Migration operation
     ************************************/
    else if( ORTE_SNAPC_OP_MIGRATE == op_event ) {
        OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Starting migration (%2d)\n",
                             op_event));

        datum = OBJ_NEW(orte_snapc_base_request_op_t);

        /*
         * Unpack migration information
         */
        count = 1;
        if (ORTE_SUCCESS != (ret = opal_dss.unpack(sbuffer, &(datum->mig_num), &count, OPAL_INT))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        datum->mig_vpids = malloc(sizeof(int) * datum->mig_num);
        datum->mig_host_pref = malloc(sizeof(char) * datum->mig_num * OPAL_MAX_PROCESSOR_NAME);
        datum->mig_vpid_pref = malloc(sizeof(int) * datum->mig_num);
        datum->mig_off_node  = malloc(sizeof(int) * datum->mig_num);

        for( i = 0; i < datum->mig_num; ++i ) {
            (datum->mig_vpids)[i] = 0;
            (datum->mig_host_pref)[i][0] = '\0';
            (datum->mig_vpid_pref)[i] = 0;
            (datum->mig_off_node)[i] = (int)false;
        }

        for( i = 0; i < datum->mig_num; ++i ) {
            count = 1;
            if (ORTE_SUCCESS != (ret = opal_dss.unpack(sbuffer, &((datum->mig_vpids)[i]), &count, OPAL_INT))) {
                ORTE_ERROR_LOG(ret);
                goto cleanup;
            }

            if(NULL != tmp_str ) {
                free(tmp_str);
                tmp_str = NULL;
            }
            count = 1;
            if (ORTE_SUCCESS != (ret = opal_dss.unpack(sbuffer, &tmp_str, &count, OPAL_STRING))) {
                ORTE_ERROR_LOG(ret);
                goto cleanup;
            }
            strncpy( ((datum->mig_host_pref)[i]), tmp_str, OPAL_MAX_PROCESSOR_NAME);

            count = 1;
            if (ORTE_SUCCESS != (ret = opal_dss.unpack(sbuffer, &((datum->mig_vpid_pref)[i]), &count, OPAL_INT))) {
                ORTE_ERROR_LOG(ret);
                goto cleanup;
            }

            count = 1;
            if (ORTE_SUCCESS != (ret = opal_dss.unpack(sbuffer, &((datum->mig_off_node)[i]), &count, OPAL_INT))) {
                ORTE_ERROR_LOG(ret);
                goto cleanup;
            }

            OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                                 "Global) Migration %3d/%3d: Received Rank %3d - Requested <%s> (%3d) %c\n",
                                 datum->mig_num, i,
                                 (datum->mig_vpids)[i],
                                 (datum->mig_host_pref)[i],
                                 (datum->mig_vpid_pref)[i],
                                 (OPAL_INT_TO_BOOL((datum->mig_off_node)[i]) ? 'T' : 'F')
                                 ));
        }

        /*
         * Kick off the migration
         */
        OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                             "Global) ------ Kick Off Migration -----"));
        if( ORTE_SUCCESS != (ret = orte_errmgr_base_migrate_job(current_global_jobid, datum) ) ) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        /*
         * Tell the sender that the operation is finished
         */
        OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                             "Global) ------ Finished Migration. Release processes (%15s )-----",
                             ORTE_NAME_PRINT(sender) ));
        buffer = OBJ_NEW(opal_buffer_t);
        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &command, 1, ORTE_SNAPC_FULL_CMD))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &op_event, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        op_state = 0;
        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &op_state, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(sender, buffer, ORTE_RML_TAG_SNAPC_FULL,
                                                           orte_rml_send_callback, NULL))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                             "Global) ------ Finished Migration. Released processes (%15s )-----",
                             ORTE_NAME_PRINT(sender) ));
    }
    /************************************
     * Start the Quiesce operation
     ************************************/
    else if( ORTE_SNAPC_OP_QUIESCE_START == op_event) {
        OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Starting quiesce (%2d)\n",
                             op_event));

        options = OBJ_NEW(opal_crs_base_ckpt_options_t);
        options->inc_prep_only = true;
        if( ORTE_SUCCESS != (ret = snapc_full_global_checkpoint(options) ) ) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        /*
         * Wait for quiescence
         */
        while( current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_ERROR &&
               current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_INC_PREPED ) {
            opal_progress();
        }

        OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Quiesce_start finished(%2d)\n",
                             op_event));
    }
    /************************************
     * End the Quiesce operation
     ************************************/
    else if( ORTE_SNAPC_OP_QUIESCE_END == op_event) {
        OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Ending quiesce (%2d)\n",
                             op_event));

        /*
         * Wait for the checkpoint operation to finish
         */
        while( current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_ERROR &&
               current_job_ckpt_state != ORTE_SNAPC_CKPT_STATE_NONE ) {
            opal_progress();
        }

        OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                             "Global) process_request_op(): Quiesce_end finished(%2d)\n",
                             op_event));
    }

cleanup:
    if (NULL != buffer) {
        OBJ_RELEASE(buffer);
        buffer = NULL;
    }

    if( NULL != options ) {
        OBJ_RELEASE(options);
        options = NULL;
    }

    if(NULL != tmp_str ) {
        free(tmp_str);
        tmp_str = NULL;
    }

    return;
}

static int snapc_full_process_orted_update_cmd(orte_process_name_t* sender,
                                               opal_buffer_t* buffer,
                                               bool quick)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_std_cntr_t count;
    int remote_ckpt_state;
    opal_list_item_t* item = NULL;
    opal_list_item_t* aitem = NULL;
    orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL;
    orte_snapc_base_local_snapshot_t *app_snapshot = NULL;
    int loc_min_state;
    char *state_str = NULL;

    orted_snapshot = find_orted_snapshot(sender);
    if( NULL == orted_snapshot ) {
        opal_output(mca_snapc_full_component.super.output_handle,
                    "Global) Error: Unknown Daemon %s",
                    ORTE_NAME_PRINT(sender) );
        exit_status = ORTE_ERROR;
        ORTE_ERROR_LOG(ORTE_ERROR);
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                         "Global) Daemon %s: Changed state to:\n",
                         ORTE_NAME_PRINT(&(orted_snapshot->process_name)) ));

    /*
     * Unpack the data (quick)
     * - state
     * Unpack the data (long)
     * - state
     * - # procs
     * - Foreach proc
     *   - process name
     */
    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &remote_ckpt_state, &count, OPAL_INT))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }
    orted_snapshot->state = remote_ckpt_state;
    orte_snapc_ckpt_state_str(&state_str, orted_snapshot->state);
    OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                         "Global)   State:        %d (%s)\n",
                         (int)(orted_snapshot->state), state_str));
    free(state_str);
    state_str = NULL;

    /* JJH: Though there is currently no additional information sent in a long
     *      message versus a small message, keep this logic so that in the
     *      future it can be easily reused without substantially modifying
     *      the component.
     */
    if( quick ) {
        exit_status = ORTE_SUCCESS;
        goto post_process;
    }

 post_process:
    loc_min_state = snapc_full_global_get_min_state();

    SNAPC_FULL_REPORT_PROGRESS(orted_snapshot, current_total_orteds, loc_min_state);

    /*
     * Notify the orte-checkpoint command once we have everyone running.
     * No need to broadcast this to everyone since they already know.
     */
    if( ORTE_SNAPC_CKPT_STATE_RUNNING == loc_min_state &&
        ORTE_SNAPC_CKPT_STATE_RUNNING != current_job_ckpt_state) {
        current_job_ckpt_state = loc_min_state;

        SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_RUNNING);

        if( is_orte_checkpoint_connected &&
            ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender,
                                                                                global_snapshot.ss_handle,
                                                                                current_job_ckpt_state)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }

    /*
     * If we are just prep'ing the INC, then acknowledge the state change
     */
    if( ORTE_SNAPC_CKPT_STATE_INC_PREPED == loc_min_state &&
        ORTE_SNAPC_CKPT_STATE_INC_PREPED > current_job_ckpt_state) {
        current_job_ckpt_state = loc_min_state;

        OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                             "Global)    All Processes have finished the INC prep!\n"));
    }

    /*
     * Notify the orte-checkpoint command once we have everyone stopped.
     * No need to broadcast this to everyone since they already know.
     */
    if( ORTE_SNAPC_CKPT_STATE_STOPPED == loc_min_state &&
        ORTE_SNAPC_CKPT_STATE_STOPPED > current_job_ckpt_state) {
        current_job_ckpt_state = loc_min_state;

        OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                             "Global)    All Processes have been stopped!\n"));

        if( is_orte_checkpoint_connected &&
            ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender,
                                                                                global_snapshot.ss_handle,
                                                                                current_job_ckpt_state)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }

        /* orte-checkpoint detaches at this point */
        is_orte_checkpoint_connected = false;

        /*
         * Synchronize the checkpoint here
         */
        write_out_global_metadata();
    }

    /*
     * If all daemons have finished, let everyone know we are locally finished.
     */
    if( ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL == loc_min_state &&
        ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL > current_job_ckpt_state) {

        SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_FIN_LOCAL);

        if( ORTE_SNAPC_CKPT_STATE_NONE != current_job_ckpt_state ) {
            if( loc_min_state == current_job_ckpt_state) {
                opal_output(0, "Global) JJH WARNING!!: (%d) == (%d)", loc_min_state, current_job_ckpt_state);
            }
        }

        if( currently_migrating ) {
            write_out_global_metadata();
            current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_MIGRATING;
        }
        else {
            current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL;
        }

        if( NULL != state_str ) {
            free(state_str);
        }
        orte_snapc_ckpt_state_str(&state_str, current_job_ckpt_state);
        OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                             "Global) Job State Changed: %d (%s)\n",
                             (int)current_job_ckpt_state, state_str ));
        free(state_str);
        state_str = NULL;

        if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(current_global_jobid,
                                                                            current_job_ckpt_state,
                                                                            global_snapshot.ss_handle,
                                                                            true, NULL) ) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }

        /*
         * Now that we have finished locally,
         * - Write out the metadata
         * - Sync the snapshot to SStore
         * if we are stopping then we have already written out this data.
         */
        if( !(global_snapshot.options->stop) && !currently_migrating ) {
            write_out_global_metadata();
        }

        SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_ESTABLISH);

        if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(current_global_jobid,
                                                                            ORTE_SNAPC_CKPT_STATE_ESTABLISHED,
                                                                            global_snapshot.ss_handle,
                                                                            true, NULL) ) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }

    /*
     * If all daemons have confirmed that their local proces are finished
     * and we have finished establishing the checkpoint,
     * then let the command line tool know and cleanup.
     */
    if( ORTE_SNAPC_CKPT_STATE_RECOVERED == loc_min_state &&
        ORTE_SNAPC_CKPT_STATE_RECOVERED > current_job_ckpt_state ) {

        /*
         * If this is a job restarting then we do something different
         */
        if( current_job_ckpt_state == ORTE_SNAPC_CKPT_STATE_NONE ) {
            OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle,
                                 "Global) Job has been successfully restarted"));

            /*current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_RECOVERED;*/
            orte_snapc_ckpt_state_notify(ORTE_SNAPC_CKPT_STATE_RECOVERED);

            for(item  = opal_list_get_first(&(global_snapshot.local_snapshots));
                item != opal_list_get_end(&(global_snapshot.local_snapshots));
                item  = opal_list_get_next(item) ) {
                orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item;

                orted_snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE;

                for(aitem  = opal_list_get_first(&(orted_snapshot->super.local_snapshots));
                    aitem != opal_list_get_end(&(orted_snapshot->super.local_snapshots));
                    aitem  = opal_list_get_next(aitem) ) {
                    app_snapshot = (orte_snapc_base_local_snapshot_t*)aitem;

                    app_snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE;
                }
            }

            SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_RECOVERED);
            SNAPC_FULL_DISPLAY_RECOVERED_TIMER();
            orte_snapc_base_has_recovered = true;
            is_app_checkpointable = true;

            exit_status = ORTE_SUCCESS;
            goto cleanup;
        }

        /*
         * If the checkpoint has not been established yet, then do not clear the
         * snapshot structure just yet.
         */
        if(ORTE_SNAPC_CKPT_STATE_ESTABLISHED != current_job_ckpt_state ) {
            cleanup_on_establish = true;
        }

        current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_RECOVERED;

        if( NULL != state_str ) {
            free(state_str);
        }
        orte_snapc_ckpt_state_str(&state_str, current_job_ckpt_state);
        OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                             "Global) Job State Changed: %d (%s)\n",
                             (int)current_job_ckpt_state, state_str ));
        free(state_str);
        state_str = NULL;

        /*
         * Notify the orte-checkpoint command
         */
        if( is_orte_checkpoint_connected &&
            ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender,
                                                                                global_snapshot.ss_handle,
                                                                                current_job_ckpt_state)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }

        SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_RECOVERED);

        /*
         * If the checkpoint has been established at this point, then cleanup.
         */
        if( !cleanup_on_establish && ORTE_SNAPC_CKPT_STATE_RECOVERED == current_job_ckpt_state) {
            if( ORTE_SUCCESS != (ret = orte_snapc_full_global_reset_coord()) ) {
                ORTE_ERROR_LOG(ret);
                exit_status = ret;
                goto cleanup;
            }
        }
    }

 cleanup:
    if( NULL != state_str ) {
        free(state_str);
        state_str = NULL;
    }

    return exit_status;
}

static void snapc_full_process_restart_proc_info_cmd(orte_process_name_t* sender,
                                                     opal_buffer_t* buffer)
{
    int ret;
    orte_std_cntr_t count;
    size_t num_vpids = 0, i;
    pid_t tmp_pid;
    char * tmp_hostname = NULL;

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &tmp_hostname, &count, OPAL_STRING))) {
        opal_output(mca_snapc_full_component.super.output_handle,
                    "Global) vpid_assoc: Failed to unpack process Hostname from peer %s\n",
                    ORTE_NAME_PRINT(sender));
        goto cleanup;
    }

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &num_vpids, &count, OPAL_SIZE))) {
        opal_output(mca_snapc_full_component.super.output_handle,
                    "Global) vpid_assoc: Failed to unpack num_vpids from peer %s\n",
                    ORTE_NAME_PRINT(sender));
        goto cleanup;
    }

    for(i = 0; i < num_vpids; ++i) {
        count = 1;
        if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &tmp_pid, &count, OPAL_PID))) {
            opal_output(mca_snapc_full_component.super.output_handle,
                        "Global) vpid_assoc: Failed to unpack process PID from peer %s\n",
                        ORTE_NAME_PRINT(sender));
            goto cleanup;
        }

        global_coord_restart_proc_info(tmp_pid, tmp_hostname);
    }

    /* stdout may be buffered by the C library so it needs to be flushed so
     * that the debugger can read the process info.
     */
    fflush(stdout);

 cleanup:
    return;
}

int global_coord_restart_proc_info(pid_t local_pid, char * local_hostname)
{
    printf("MPIR_debug_info) %s:%d\n", local_hostname, local_pid);
    return 0;
}

static void snapc_full_process_job_update_cmd(orte_process_name_t* sender,
                                              opal_buffer_t* buffer,
                                              bool quick)
{
    int ret;
    orte_std_cntr_t count;
    orte_jobid_t jobid;
    int   job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE;
    opal_crs_base_ckpt_options_t *options = NULL;
    bool loc_migrating = false;
    size_t loc_num_procs = 0;
    orte_proc_t *proc = NULL;
    size_t i;
    orte_sstore_base_handle_t ss_handle;

    /*
     * Unpack the data (quick)
     * - jobid
     * - ckpt_state
     * - sstore_handle
     * Unpack the data (long)
     * - jobid
     * - ckpt_state
     * - ckpt_options
     */
    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &jobid, &count, ORTE_JOBID))) {
        ORTE_ERROR_LOG(ret);
        return;
    }

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_state, &count, OPAL_INT))) {
        ORTE_ERROR_LOG(ret);
        return;
    }

    if( !quick ) {
        if (ORTE_SUCCESS != (ret = orte_sstore.unpack_handle(sender, buffer, &ss_handle)) ) {
            ORTE_ERROR_LOG(ret);
            return;
        }

        options = OBJ_NEW(opal_crs_base_ckpt_options_t);
        if( ORTE_SUCCESS != (ret = orte_snapc_base_unpack_options(buffer, options)) ) {
            ORTE_ERROR_LOG(ret);
            return;
        }
        /* In this case we want to use the current_options that are cached
         * so that we do not have to send them every time.
         */
        opal_crs_base_copy_options(options, global_snapshot.options);

        count = 1;
        if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(loc_migrating), &count, OPAL_BOOL))) {
            ORTE_ERROR_LOG(ret);
            goto cleanup;
        }

        if( loc_migrating ) {
            count = 1;
            if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &loc_num_procs, &count, OPAL_SIZE))) {
                ORTE_ERROR_LOG(ret);
                goto cleanup;
            }

            for( i = 0; i < loc_num_procs; ++i ) {
                count = 1;
                if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &proc, &count, ORTE_NAME))) {
                    ORTE_ERROR_LOG(ret);
                    goto cleanup;
                }
                /* JJH: Update local info as needed */
            }
        }
    }

    if( ORTE_SUCCESS != (ret = global_coord_job_state_update(jobid,
                                                             job_ckpt_state,
                                                             ss_handle,
                                                             global_snapshot.options) ) ) {
        ORTE_ERROR_LOG(ret);
    }

 cleanup:
    if( NULL != options ) {
        OBJ_RELEASE(options);
        options = NULL;
    }

    return;
}

static int snapc_full_establish_snapshot_dir(bool empty_metadata)
{
    char **value = NULL;
    int idx = 0;

    /*********************
     * Contact the Stable Storage Framework to setup the storage directory
     *********************/
    INC_SEQ_NUM();
    orte_sstore.request_checkpoint_handle(&(global_snapshot.ss_handle),
                                          orte_snapc_base_snapshot_seq_number,
                                          current_global_jobid);
    if( currently_migrating ) {
        orte_sstore.set_attr(global_snapshot.ss_handle,
                             SSTORE_METADATA_GLOBAL_MIGRATING,
                             "1");
    }
    orte_sstore.register_handle(global_snapshot.ss_handle);

    /*
     * Save the AMCA parameter used into the metadata file
     */
    if( 0 > (idx = mca_base_var_find("opal", "mca", "base", "param_file_prefix")) ) {
        opal_show_help("help-orte-restart.txt", "amca_param_not_found", true);
    }
    if( 0 < idx ) {
        mca_base_var_get_value (idx, &value, NULL, NULL);

        if (*value) {
            orte_sstore.set_attr(global_snapshot.ss_handle,
                                 SSTORE_METADATA_GLOBAL_AMCA_PARAM,
                                 *value);

            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) AMCA Parameter Preserved: %s",
                                 *value));
        }
    }

    /*
     * Save the TUNE parameter used into the metadata file
     */
    if( 0 > (idx = mca_base_var_find("opal", "mca", "base", "envar_file_prefix")) ) {
        opal_show_help("help-orte-restart.txt", "tune_param_not_found", true);
    }
    if( 0 < idx ) {
        mca_base_var_get_value (idx, &value, NULL, NULL);

        if (*value) {
            orte_sstore.set_attr(global_snapshot.ss_handle,
                                 SSTORE_METADATA_GLOBAL_TUNE_PARAM,
                                 *value);

            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) TUNE Parameter Preserved: %s",
                                 *value));
        }
    }

    return ORTE_SUCCESS;
}

static int snapc_full_global_checkpoint(opal_crs_base_ckpt_options_t *options)
{
    int ret, exit_status = ORTE_SUCCESS;

    OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                         "Global) Checkpoint of job %s has been requested\n",
                         ORTE_JOBID_PRINT(current_global_jobid)));

    /* opal_output(0, "================> JJH Checkpoint Started"); */

    current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_REQUEST;

    /*********************
     * Generate the global snapshot directory, and unique global snapshot handle
     *********************/
    if( ORTE_SUCCESS != (ret = snapc_full_establish_snapshot_dir(false))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /***********************************
     * Do an update handshake with the orte_checkpoint command
     ***********************************/
    updated_job_to_running = false;
    if( is_orte_checkpoint_connected &&
        ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender,
                                                                            global_snapshot.ss_handle,
                                                                            current_job_ckpt_state) ) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /**********************
     * Notify the Local Snapshot Coordinators of the checkpoint request
     **********************/
    OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle,
                         "Global) Notifying the Local Coordinators\n"));

    if( ORTE_SUCCESS != (ret = snapc_full_global_notify_checkpoint(current_global_jobid, options)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

 cleanup:
    return exit_status;
}

static int  snapc_full_global_notify_checkpoint(orte_jobid_t jobid,
                                                opal_crs_base_ckpt_options_t *options)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL;
    opal_list_item_t* item = NULL;
    int ckpt_state;

    ckpt_state = ORTE_SNAPC_CKPT_STATE_PENDING;

    /*
     * Copy over the options
     */
    opal_crs_base_copy_options(options, global_snapshot.options);

    /*
     * Update the global structure
     */
    for(item  = opal_list_get_first(&global_snapshot.local_snapshots);
        item != opal_list_get_end(&global_snapshot.local_snapshots);
        item  = opal_list_get_next(item) ) {
        orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item;

        orted_snapshot->state   = ckpt_state;
    }

    /*
     * Update the job state, and broadcast to all local daemons
     */
    if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(jobid,
                                                                        ckpt_state,
                                                                        global_snapshot.ss_handle,
                                                                        false, options) ) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

 cleanup:
    return exit_status;
}

/**********************************
 * Job/Proc State Set/Get Routines
 **********************************/
static int orte_snapc_full_global_set_job_ckpt_info( orte_jobid_t jobid,
                                                     int    ckpt_state,
                                                     orte_sstore_base_handle_t handle,
                                                     bool quick,
                                                     opal_crs_base_ckpt_options_t *options)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_snapc_full_cmd_flag_t command;
    opal_buffer_t *buffer = NULL;
    char * state_str = NULL;
    orte_proc_t *proc = NULL;
    opal_list_item_t *item = NULL;
    size_t num_procs;
    orte_grpcomm_signature_t *sig;

    /*
     * Update all Local Coordinators (broadcast operation)
     */
    buffer = OBJ_NEW(opal_buffer_t);

    if( quick ) {
        command = ORTE_SNAPC_FULL_UPDATE_JOB_STATE_QUICK_CMD;
    } else {
        command = ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD;
    }

    if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &command, 1, ORTE_SNAPC_FULL_CMD))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &jobid, 1, ORTE_JOBID))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &ckpt_state, 1, OPAL_INT))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if( quick ) {
        goto process_msg;
    }

    if (ORTE_SUCCESS != (ret = orte_sstore.pack_handle(NULL, buffer, handle))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if(ORTE_SUCCESS != (ret = orte_snapc_base_pack_options(buffer, options))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &(currently_migrating), 1, OPAL_BOOL))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if( currently_migrating ) {
        num_procs = opal_list_get_size(migrating_procs);

        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &num_procs, 1, OPAL_SIZE))) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }

        for (item  = opal_list_get_first(migrating_procs);
             item != opal_list_get_end(migrating_procs);
             item  = opal_list_get_next(item)) {
            proc = (orte_proc_t*)item;
            if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &(proc->name), 1, ORTE_NAME))) {
                ORTE_ERROR_LOG(ret);
                exit_status = ret;
                goto cleanup;
            }
        }
    }

 process_msg:
    orte_snapc_ckpt_state_str(&state_str, ckpt_state);
    OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle,
                         "Global) Notify Local Coordinators of job %s state change to %d (%s)\n",
                         ORTE_JOBID_PRINT(jobid), (int)ckpt_state, state_str ));
    free(state_str);
    state_str = NULL;

    /* goes to all daemons */
    sig = OBJ_NEW(orte_grpcomm_signature_t);
    sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
    sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
    sig->signature[0].vpid = ORTE_VPID_WILDCARD;
    if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast(sig, ORTE_RML_TAG_SNAPC_FULL, buffer))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * We will also receive the job update, and process in the RML callback
     */

 cleanup:
    if( NULL != state_str ) {
        free(state_str);
        state_str = NULL;
    }

    OBJ_RELEASE(buffer);
    OBJ_RELEASE(sig);

    return exit_status;
}

int global_coord_job_state_update(orte_jobid_t jobid,
                                  int    job_ckpt_state,
                                  orte_sstore_base_handle_t ss_handle,
                                  opal_crs_base_ckpt_options_t *options)
{
    int ret, exit_status = ORTE_SUCCESS;
    char * state_str = NULL;

    orte_snapc_ckpt_state_str(&state_str, job_ckpt_state);
    OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle,
                         "Global) Job update command: jobid %s -> state %d (%s)\n",
                         ORTE_JOBID_PRINT(jobid), (int)job_ckpt_state, state_str ));
    free(state_str);
    state_str = NULL;

    /************************
     * Update the orte_checkpoint command
     ************************/
    current_job_ckpt_state = job_ckpt_state;
    if( is_orte_checkpoint_connected &&
        ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender,
                                                                            global_snapshot.ss_handle,
                                                                            current_job_ckpt_state)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Global Coordinator: If also a Local coordinator then act locally before globally
     */
    if( ORTE_SNAPC_LOCAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_LOCAL_COORD_TYPE) ) {
        if( ORTE_SUCCESS != (ret = local_coord_job_state_update(jobid,
                                                                job_ckpt_state,
                                                                ss_handle,
                                                                options)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }

    /*
     * Process the cmd
     */
    if(ORTE_SNAPC_CKPT_STATE_ESTABLISHED == job_ckpt_state ) {
        /*
         * If the processes recovered before the checkpoint was established,
         * then we need to cleanup here instead of in the recovery block
         */
        if( cleanup_on_establish ) {
            if( ORTE_SUCCESS != (ret = orte_snapc_full_global_reset_coord()) ) {
                ORTE_ERROR_LOG(ret);
                exit_status = ret;
                goto cleanup;
            }
        }
    }
    else if(ORTE_SNAPC_CKPT_STATE_ERROR     == job_ckpt_state ) {
        opal_output(mca_snapc_full_component.super.output_handle,
                    "Error: Checkpoint failed!");
    }
    /*
     * This should not happen, since this state is always handled locally
     */
    else if(ORTE_SNAPC_CKPT_STATE_STOPPED == job_ckpt_state ) {
        ;
    }
    /*
     * This should not happen, since we do not handle this case
     */
    else if(ORTE_SNAPC_CKPT_STATE_REQUEST == job_ckpt_state ) {
        opal_output(mca_snapc_full_component.super.output_handle,
                    "ERROR: Internal Checkpoint request not implemented.");
        ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
    }

 cleanup:
    if( NULL != state_str) {
        free(state_str);
        state_str = NULL;
    }

    return exit_status;
}

static int write_out_global_metadata(void)
{
    orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL;
    opal_list_item_t* orted_item = NULL;

    OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                         "Global) Updating Metadata"));

    /*
     * Check for an error
     * JJH CLEANUP: Check might be good, but mostly unnecessary
     * JJH: Do we want to pass this along to the SStore? Probably
     */
    for(orted_item  = opal_list_get_first(&(global_snapshot.local_snapshots));
        orted_item != opal_list_get_end(&(global_snapshot.local_snapshots));
        orted_item  = opal_list_get_next(orted_item) ) {
        orted_snapshot = (orte_snapc_full_orted_snapshot_t*)orted_item;

        if( ORTE_SNAPC_CKPT_STATE_ERROR == orted_snapshot->state ) {
            return ORTE_ERROR;
        }
    }

    /*
     * Sync the stable storage
     */
    orte_sstore.sync(global_snapshot.ss_handle);

    SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_SS_SYNC);

    return ORTE_SUCCESS;
}

static orte_snapc_full_orted_snapshot_t *find_orted_snapshot(orte_process_name_t *name )
{
    int ret;

    orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL;
    opal_list_item_t* item = NULL;
    orte_ns_cmp_bitmask_t mask;

    for(item  = opal_list_get_first(&(global_snapshot.local_snapshots));
        item != opal_list_get_end(&(global_snapshot.local_snapshots));
        item  = opal_list_get_next(item) ) {
        orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item;

        mask = ORTE_NS_CMP_ALL;

        if (OPAL_EQUAL ==
                orte_util_compare_name_fields(mask, name, &orted_snapshot->process_name)) {
            return orted_snapshot;
        }
    }

    /*
     * Refresh the job structure, and try again
     */
    OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
                         "Global) find_orted(%s) failed. Refreshing and trying again...",
                         ORTE_NAME_PRINT(name) ));

    if( ORTE_SUCCESS != (ret = global_refresh_job_structs()) ) {
        ORTE_ERROR_LOG(ret);
        return NULL;
    }

    for(item  = opal_list_get_first(&(global_snapshot.local_snapshots));
        item != opal_list_get_end(&(global_snapshot.local_snapshots));
        item  = opal_list_get_next(item) ) {
        orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item;

        mask = ORTE_NS_CMP_ALL;

        if (OPAL_EQUAL ==
                orte_util_compare_name_fields(mask, name, &orted_snapshot->process_name)) {
            return orted_snapshot;
        }
    }

    return NULL;
}

static int snapc_full_global_get_min_state(void)
{
    int min_state = ORTE_SNAPC_CKPT_MAX;
    orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL;
    opal_list_item_t* item = NULL;
    char * state_str_a = NULL;
    char * state_str_b = NULL;

    current_total_orteds = 0;

    for(item  = opal_list_get_first(&(global_snapshot.local_snapshots));
        item != opal_list_get_end(&(global_snapshot.local_snapshots));
        item  = opal_list_get_next(item) ) {
        orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item;

        /* Ignore orteds with no processes */
        if( 0 >= opal_list_get_size(&(orted_snapshot->super.local_snapshots)) ) {
            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) ... %s Skipping - (no children)",
                                 ORTE_NAME_PRINT(&orted_snapshot->process_name) ));
            continue;
        }

        current_total_orteds++;

        if( NULL != state_str_a ) {
            free(state_str_a);
            state_str_a = NULL;
        }
        if( NULL != state_str_b ) {
            free(state_str_b);
            state_str_b = NULL;
        }

        orte_snapc_ckpt_state_str(&state_str_a, orted_snapshot->state);
        orte_snapc_ckpt_state_str(&state_str_b, min_state);

        OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                             "Global) ... %s Checking [%d %s] vs [%d %s]",
                             ORTE_NAME_PRINT(&orted_snapshot->process_name),
                             (int)orted_snapshot->state, state_str_a,
                             min_state, state_str_b ));

        if( (int)min_state > (int)orted_snapshot->state ) {
            min_state = orted_snapshot->state;

            OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                                 "Global) ... %s Update  --> Min State [%d %s]",
                                 ORTE_NAME_PRINT(&orted_snapshot->process_name),
                                 (int)min_state, state_str_a ));
        }
    }

    if( NULL != state_str_b ) {
        free(state_str_b);
        state_str_b = NULL;
    }
    orte_snapc_ckpt_state_str(&state_str_b, min_state);
    OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
                         "Global) ... Min State [%d %s]",
                         (int)min_state, state_str_b ));

    if( NULL != state_str_a ) {
        free(state_str_a);
        state_str_a = NULL;
    }
    if( NULL != state_str_b ) {
        free(state_str_b);
        state_str_b = NULL;
    }

    return min_state;
}

static int orte_snapc_full_global_reset_coord(void)
{
    int ret, exit_status = ORTE_SUCCESS;
    opal_list_item_t* item = NULL;
    opal_list_item_t* aitem = NULL;
    orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL;
    orte_snapc_base_local_snapshot_t *app_snapshot = NULL;


    /********************************
     * Terminate the job if requested
     * At this point the application should have already exited, but do this
     * just to make doubly sure that the job is terminated.
     *********************************/
    if( global_snapshot.options->term ) {
        SNAPC_FULL_DISPLAY_ALL_TIMERS();
        orte_plm.terminate_job(current_global_jobid);
    } else {
        SNAPC_FULL_DISPLAY_ALL_TIMERS();
    }

    /*
     * Just cleanup, do not need to send out another message
     */
    opal_crs_base_clear_options(global_snapshot.options);

    /*
     * Reset global data structures
     */
    for(item  = opal_list_get_first(&(global_snapshot.local_snapshots));
        item != opal_list_get_end(&(global_snapshot.local_snapshots));
        item  = opal_list_get_next(item) ) {
        orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item;

        orted_snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE;

        for(aitem  = opal_list_get_first(&(orted_snapshot->super.local_snapshots));
            aitem != opal_list_get_end(&(orted_snapshot->super.local_snapshots));
            aitem  = opal_list_get_next(aitem) ) {
            app_snapshot = (orte_snapc_base_local_snapshot_t*)aitem;

            app_snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE;
        }
    }

    /************************
     * Set up the Command Line listener again
     *************************/
    is_orte_checkpoint_connected = false;
    if( ORTE_SUCCESS != (ret = snapc_full_global_start_cmdline_listener() ) ){
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
    }

    current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE;
    cleanup_on_establish = false;

    report_progress_cur_loc_finished = 0;
    report_progress_last_reported_loc_finished = 0;

    return exit_status;
}

/************************
 * Timing
 ************************/
static void snapc_full_set_time(int idx)
{
    if(idx < SNAPC_FULL_TIMER_MAX ) {
        if( timer_start[idx] <= 0.0 ) {
            timer_start[idx] = snapc_full_get_time();
        }
    }
}

static void snapc_full_display_all_timers(void)
{
    double diff = 0.0;
    char * label = NULL;

    opal_output(0, "Snapshot Coordination Timing: ******************** Summary Begin\n");

    /********** Startup time **********/
    label = strdup("Running");
    diff = timer_start[SNAPC_FULL_TIMER_RUNNING]   - timer_start[SNAPC_FULL_TIMER_START];
    snapc_full_display_indv_timer_core(diff, label);
    free(label);

    /********** Time to finish locally **********/
    label = strdup("Finish Locally");
    diff = timer_start[SNAPC_FULL_TIMER_FIN_LOCAL] - timer_start[SNAPC_FULL_TIMER_RUNNING];
    snapc_full_display_indv_timer_core(diff, label);
    free(label);

    if( timer_start[SNAPC_FULL_TIMER_SS_SYNC] <= timer_start[SNAPC_FULL_TIMER_RECOVERED] ) {
        /********** SStore Sync **********/
        label = strdup("SStore Sync");
        diff = timer_start[SNAPC_FULL_TIMER_SS_SYNC]   - timer_start[SNAPC_FULL_TIMER_FIN_LOCAL];
        snapc_full_display_indv_timer_core(diff, label);
        free(label);

        /********** Establish Ckpt **********/
        label = strdup("Establish");
        diff = timer_start[SNAPC_FULL_TIMER_ESTABLISH]   - timer_start[SNAPC_FULL_TIMER_SS_SYNC];
        snapc_full_display_indv_timer_core(diff, label);
        free(label);

        /********** Recover **********/
        label = strdup("Continue/Recover");
        diff = timer_start[SNAPC_FULL_TIMER_RECOVERED] - timer_start[SNAPC_FULL_TIMER_ESTABLISH];
        snapc_full_display_indv_timer_core(diff, label);
        free(label);
    } else { /* Established after procs recovered */
        /********** SStore Sync **********/
        label = strdup("SStore Sync*");
        diff = timer_start[SNAPC_FULL_TIMER_SS_SYNC]   - timer_start[SNAPC_FULL_TIMER_RECOVERED];
        snapc_full_display_indv_timer_core(diff, label);
        free(label);

        /********** Establish Ckpt **********/
        label = strdup("Establish*");
        diff = timer_start[SNAPC_FULL_TIMER_ESTABLISH]   - timer_start[SNAPC_FULL_TIMER_SS_SYNC];
        snapc_full_display_indv_timer_core(diff, label);
        free(label);

        /********** Recover **********/
        label = strdup("Continue/Recover*");
        diff = timer_start[SNAPC_FULL_TIMER_RECOVERED] - timer_start[SNAPC_FULL_TIMER_FIN_LOCAL];
        snapc_full_display_indv_timer_core(diff, label);
        free(label);
    }

    opal_output(0, "Snapshot Coordination Timing: ******************** Summary End\n");
}

static void snapc_full_display_recovered_timers(void)
{
    double diff = 0.0;
    char * label = NULL;

    opal_output(0, "Snapshot Coordination Timing: ******************** Summary Begin\n");

    /********** Recover **********/
    label = strdup("Recover");
    diff = timer_start[SNAPC_FULL_TIMER_RECOVERED] - timer_start[SNAPC_FULL_TIMER_START];
    snapc_full_display_indv_timer_core(diff, label);
    free(label);

    opal_output(0, "Snapshot Coordination Timing: ******************** Summary End\n");
}

static void snapc_full_clear_timers(void)
{
    int i;
    for(i = 0; i < SNAPC_FULL_TIMER_MAX; ++i) {
        timer_start[i] = 0.0;
    }
}

static double snapc_full_get_time(void)
{
    double wtime;

#if OPAL_TIMER_USEC_NATIVE
    wtime = (double)opal_timer_base_get_usec() / 1000000.0;
#else
    struct timeval tv;
    gettimeofday(&tv, NULL);
    wtime = tv.tv_sec;
    wtime += (double)tv.tv_usec / 1000000.0;
#endif

    return wtime;
}

static void snapc_full_display_indv_timer_core(double diff, char *str)
{
    double total = 0;
    double perc  = 0;

    if( timer_start[SNAPC_FULL_TIMER_SS_SYNC] <= timer_start[SNAPC_FULL_TIMER_RECOVERED] ) {
        total = timer_start[SNAPC_FULL_TIMER_RECOVERED] - timer_start[SNAPC_FULL_TIMER_START];
    } else {
        total = timer_start[SNAPC_FULL_TIMER_ESTABLISH] - timer_start[SNAPC_FULL_TIMER_START];
    }
    perc = (diff/total) * 100;

    opal_output(0,
                "snapc_full: timing: %-20s = %10.2f s\t%10.2f s\t%6.2f\n",
                str,
                diff,
                total,
                perc);
    return;
}

static void snapc_full_report_progress(orte_snapc_full_orted_snapshot_t *orted_snapshot, int total, int min_state)
{
    orte_snapc_full_orted_snapshot_t *loc_orted_snapshot = NULL;
    opal_list_item_t* item = NULL;
    double perc_done;

    if( ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL != orted_snapshot->state ) {
        return;
    }

    report_progress_cur_loc_finished++;
    perc_done = (total-report_progress_cur_loc_finished)/(total*1.0);
    perc_done = (perc_done-1)*(-100.0);

    if( perc_done >= (report_progress_last_reported_loc_finished + orte_snapc_full_progress_meter) ||
        report_progress_last_reported_loc_finished == 0.0 ) {
        report_progress_last_reported_loc_finished = perc_done;
        opal_output(0, "snapc_full: progress:   %10.2f %c Locally Finished\n",
                    perc_done, '%');
    }

    if( perc_done > 95.0 ) {
        opal_output(0, "snapc_full: progress:   Waiting on the following daemons (%10.2f %c):", perc_done, '%');

        for(item  = opal_list_get_first(&(global_snapshot.local_snapshots));
            item != opal_list_get_end(&(global_snapshot.local_snapshots));
            item  = opal_list_get_next(item) ) {
            loc_orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item;

            if( ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL != loc_orted_snapshot->state ) {
                opal_output(0, "snapc_full: progress:        Daemon %s",
                            ORTE_NAME_PRINT(&loc_orted_snapshot->process_name));
            }
        }
    }

    return;
}
