Source

mpi3-fortran / ompi / mca / bcol / bcol.h

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
/*
 * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
 * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#ifndef MCA_BCOL_H
#define MCA_BCOL_H

#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/mca.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/sbgp/sbgp.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/op/op.h"
#include "ompi/include/ompi/constants.h"
#include "ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.h"

#include <limits.h>

#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif

/* Forward declaration - please do not remove it */
struct ml_memory_block_desc_t;
struct mca_coll_ml_module_t;
struct ml_buffers_t;

struct mca_bcol_base_coll_fn_comm_attributes_t;
struct mca_bcol_base_coll_fn_invoke_attributes_t;
struct mca_bcol_base_coll_fn_desc_t;

#define NUM_MSG_RANGES      5
#define MSG_RANGE_INITIAL (1024)*12
#define MSG_RANGE_INC      10
#define BCOL_THRESHOLD_UNLIMITED (INT_MAX)

#define BCOL_HEAD_ALIGN 32   /* will turn into an MCA parameter after debug */

/*
 * Functions supported
 */
enum bcol_coll {
    /* blocking functions */
    BCOL_ALLGATHER,
    BCOL_ALLGATHERV,
    BCOL_ALLREDUCE,
    BCOL_ALLTOALL,
    BCOL_ALLTOALLV,
    BCOL_ALLTOALLW,
    BCOL_BARRIER,
    BCOL_BCAST,
    BCOL_EXSCAN,
    BCOL_GATHER,
    BCOL_GATHERV,
    BCOL_REDUCE,
    BCOL_REDUCE_SCATTER,
    BCOL_SCAN,
    BCOL_SCATTER,
    BCOL_SCATTERV,
    BCOL_FANIN,
    BCOL_FANOUT,

    /* nonblocking functions */
    BCOL_IALLGATHER,
    BCOL_IALLGATHERV,
    BCOL_IALLREDUCE,
    BCOL_IALLTOALL,
    BCOL_IALLTOALLV,
    BCOL_IALLTOALLW,
    BCOL_IBARRIER,
    BCOL_IBCAST,
    BCOL_IEXSCAN,
    BCOL_IGATHER,
    BCOL_IGATHERV,
    BCOL_IREDUCE,
    BCOL_IREDUCE_SCATTER,
    BCOL_ISCAN,
    BCOL_ISCATTER,
    BCOL_ISCATTERV,
    BCOL_IFANIN,
    BCOL_IFANOUT,

    BCOL_SYNC,
    /* New function - needed for intermediate steps */
    BCOL_REDUCE_TO_LEADER,
    BCOL_NUM_OF_FUNCTIONS
};
typedef enum bcol_coll bcol_coll;

typedef enum bcol_elem_type {
    BCOL_SINGLE_ELEM_TYPE,
    BCOL_MULTI_ELEM_TYPE,
    BCOL_NUM_OF_ELEM_TYPES
} bcol_elem_type;

typedef int (*mca_bcol_base_module_coll_support_all_types_fn_t)(bcol_coll coll_name);
typedef int (*mca_bcol_base_module_coll_support_fn_t)(int op, int dtype, bcol_elem_type elem_num);

/*
 * Collective function status
 */
enum {
    BCOL_FN_NOT_STARTED = (OMPI_ERR_MAX - 1),
    BCOL_FN_STARTED     = (OMPI_ERR_MAX - 2),
    BCOL_FN_COMPLETE    = (OMPI_ERR_MAX - 3)
};

/* Originally this enum was placed in ompi/op/op.h file. It should be moved back
 * when we are ready to lobby for its inclusion. Since we are releasing only the
 * bcast and barrier initially and this struct supports the allreduce, we are not 
 * going to worry about it now. Note that in the same h-file, op.h, the struct "ompi_op_t"
 * also has a field that we introduced called "enum ompi_op_type op_type" that this needs to
 * be resolved also.
 */
enum ompi_op_type {
    OMPI_OP_NULL,
    OMPI_OP_MAX,
    OMPI_OP_MIN,
    OMPI_OP_SUM,
    OMPI_OP_PROD,
    OMPI_OP_LAND,
    OMPI_OP_BAND,
    OMPI_OP_LOR,
    OMPI_OP_BOR,
    OMPI_OP_LXOR,
    OMPI_OP_BXOR,
    OMPI_OP_MAXLOC,
    OMPI_OP_MINLOC,
    OMPI_OP_REPLACE,
    OMPI_OP_NUM_OF_TYPES
};


/**
 * Collective component initialization
 *
 * Initialize the given collective component.  This function should
 * initialize any component-level. data.  It will be called exactly
 * once during MPI_INIT.
 *
 * @note The component framework is not lazily opened, so attempts
 * should be made to minimze the amount of memory allocated during
 * this function.
 *
 * @param[in] enable_progress_threads True if the component needs to
 *                                support progress threads
 * @param[in] enable_mpi_threads  True if the component needs to
 *                                support MPI_THREAD_MULTIPLE
 *
 * @retval OMPI_SUCCESS Component successfully initialized
 * @retval ORTE_ERROR   An unspecified error occurred
 */
typedef int (*mca_bcol_base_component_init_query_fn_t)
    (bool enable_progress_threads, bool enable_mpi_threads);

/**
 * Query whether a component is available for the given sub-group
 *
 * Query whether the component is available for the given
 * sub-group.  If the component is available, an array of pointers should be
 * allocated and returned (with refcount at 1).  The module will not
 * be used for collective operations until module_enable() is called
 * on the module, but may be destroyed (via OBJ_RELEASE) either before
 * or after module_enable() is called.  If the module needs to release
 * resources obtained during query(), it should do so in the module
 * destructor.
 *
 * A component may provide NULL to this function to indicate it does
 * not wish to run or return an error during module_enable().
 *
 * @note The communicator is available for point-to-point
 * communication, but other functionality is not available during this
 * phase of initialization.
 *
 * @param[in] sbgp         Pointer to sub-group module.
 * @param[out] priority    Priority setting for component on
 *                         this communicator
 * @param[out] num_modules Number of modules that where generated
 *                         for the sub-group module.
 *
 * @returns An array of pointer to an initialized modules structures if the component can
 * provide a modules with the requested functionality or NULL if the
 * component should not be used on the given communicator.
 */
typedef struct mca_bcol_base_module_t **(*mca_bcol_base_component_comm_query_fn_t)
    (mca_sbgp_base_module_t *sbgp, int *num_modules);


typedef int (*mca_bcol_barrier_init_fn_t)(struct mca_bcol_base_module_t *bcol_module,
        mca_sbgp_base_module_t *sbgp_module);



/*
 * Macro for use in modules that are of type btl v2.0.0
 */
#define MCA_BCOL_BASE_VERSION_2_0_0 \
    MCA_BASE_VERSION_2_0_0, \
      "bcol", 2, 0, 0


/* This is really an abstarction violation, but is the easiest way to get
 * started.  For memory management we need to know what bcol components
 * have compatible memory management schemes.  Such compatibility can
 * be used to eliminate memory copies between levels in the collective
 * operation hierarchy, by having the output buffer of one level be the
 * input buffer to the next level
 */

enum {
    BCOL_SHARED_MEMORY_UMA=0,
    BCOL_SHARED_MEMORY_SOCKET,
    BCOL_POINT_TO_POINT,
    BCOL_IB_OFFLOAD,
    BCOL_SIZE
};

OMPI_DECLSPEC extern int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE];
OMPI_DECLSPEC extern int bcol_mpool_index[BCOL_SIZE][BCOL_SIZE];

/* what are the input parameters ? too many void * pointers here */
typedef int (*bcol_register_mem_fn_t)(void *context_data, void *base,
        size_t size, void **reg_desc);
/* deregistration function */
typedef int (*bcol_deregister_mem_fn_t)(void *context_data, void *reg_desc);

/* Bcol network context definition */
struct bcol_base_network_context_t {
    opal_object_t super;
    /* Context id - defined by upper layer, ML */
    int context_id;
    /* Any context information that bcol what to use */
    void *context_data;

    /* registration function */
    bcol_register_mem_fn_t register_memory_fn;
    /* deregistration function */
    bcol_deregister_mem_fn_t deregister_memory_fn;
};
typedef struct bcol_base_network_context_t bcol_base_network_context_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(bcol_base_network_context_t);

/*
 *primitive function types
 */

/* bcast */
enum {
    /* small data function */
    BCOL_BCAST_SMALL_DATA,

    /* small data - dynamic decision making supported */
    BCOL_BCAST_SMALL_DATA_DYNAMIC,

    /* number of functions */
    BCOL_NUM_BCAST_FUNCTIONS
};


/**
 *  BCOL instance.
 */

/* no limit on fragment size - this supports using user buffers rather
 * than library buffers
 */
#define FRAG_SIZE_NO_LIMIT -1

/* forward declaration */
struct coll_bcol_collective_description_t;

struct mca_bcol_base_component_2_0_0_t {

    /** Base component description */
    mca_base_component_t bcol_version;

    /** Component initialization function */
    mca_bcol_base_component_init_query_fn_t collm_init_query;

    /** Query whether component is useable for given communicator */
    mca_bcol_base_component_comm_query_fn_t collm_comm_query;

    /** If bcol supports all possible data types */
    mca_bcol_base_module_coll_support_fn_t coll_support;

    /** If bcol supports all possible data types for given collective operation */
    mca_bcol_base_module_coll_support_all_types_fn_t coll_support_all_types;

    /** Use this flag to prevent init_query multiple calls
        in case we have the same bcol more than on a single level */
    bool init_done;

    /** If collective calls with bcols of this type need to be ordered */
    bool need_ordering;

    /** MCA parameter: Priority of this component */
    int priority;

    /** Bcast function pointers */
    struct coll_bcol_collective_description_t *
        bcast_functions[BCOL_NUM_BCAST_FUNCTIONS];

    /** Number of network contexts - need this for resource management */
    int n_net_contexts;

    /** List of network contexts */
    bcol_base_network_context_t **network_contexts;

    /*
     * Fragmentation support
     */

    /** Minimum fragement size */
    size_t min_frag_size;

    /** Maximum fragment size */
    int32_t max_frag_size;

    /** Supports direct use of user-buffers */
    int can_use_user_buffers;

    /** Support pipelining */
    int use_pipeline;
};
typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_2_0_0_t;
typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_component_t);

/* forward declaration */
struct mca_coll_ml_descriptor_t;
struct ml_payload_buffer_desc_t;
struct mca_coll_ml_route_info_t;

typedef struct {
    int order_num;           /* Seq num of collective fragment */
    int bcols_started;       /* How many bcols need ordering have been started */
    int n_fns_need_ordering; /* The number of functions are called for bcols need ordering */
} mca_bcol_base_order_info_t;

struct bcol_function_args_t {
    /* full message sequence number */
    int64_t sequence_num;
    /* full message descriptor - single copy of fragment invariant
     * parameters */
    /* Pasha: We don need this one for new flow - remove it */
    struct mca_coll_ml_descriptor_t *full_message_descriptor;
    struct mca_coll_ml_route_info_t *root_route;
    /* function status */
    int function_status;
    /* root, for rooted operations */
    int root;
    /* input buffer */
    void *sbuf;
    void *rbuf;
    void *userbuf;
    struct ml_payload_buffer_desc_t *src_desc;
    struct ml_payload_buffer_desc_t *dst_desc;
   /* ml buffer size */
    uint32_t buffer_size;
    /* index of buffer in ml payload cache */
    int buffer_index;
    int count;
    struct ompi_datatype_t *dtype;
    struct ompi_op_t *op;
    int sbuf_offset;
    int rbuf_offset;
    /* for bcol opaque data */
    void *bcol_opaque_data;
    /* An output argument that will be used by BCOL funstion to tell ML that the result of the BCOL is in rbuf */
    bool result_in_rbuf;
    bool root_flag; /* True if the rank is root of operation */
    int status; /* Used for non-blocking collective completion */
    uint32_t frag_size; /* fragment size for large messages */
    int hier_factor; /* factor used when bcast is invoked as a service function back down
                      *  the tree in allgather for example, the pacl_len is not the actual
                      *  len of the data needing bcasting
                       */
    mca_bcol_base_order_info_t order_info;
};

typedef struct bcol_function_args_t bcol_function_args_t;


/* The collective operation is defined by a series of collective operations
 * invoked through a function pointer.  Each function may be different,
 * so will store the arguments in a struct and pass a pointer to the struct,
 * and use this as a way to hide the different function signatures.
 *
 * @param[in] input_args  Structure with function arguments
 * @param[in] bcol_desc   Component specific paremeters
 * @param[out] status  return status of the function
 *                     MCA_BCOL_COMPLETE    - function completed
 *                     MCA_BCOL_IN_PROGRESS - function incomplete
 *
 * @retval OMPI_SUCCESS successful completion
 * @retval OMPI_ERROR function returned error
 */
/* forward declaration */
struct mca_bcol_base_module_t;

/* collective function prototype - all functions have the same interface
 * so that we can call them via a function pointer */
struct coll_ml_function_t;
typedef int (*mca_bcol_base_module_collective_fn_primitives_t)
    (bcol_function_args_t *input_args, struct coll_ml_function_t *const_args);

typedef int (*mca_bcol_base_module_collective_init_fn_primitives_t)
    (struct mca_bcol_base_module_t *bcol_module);

    /**
     *  function to query for collctive function attributes
     *
     *  @param attribute (IN) the attribute of interest
     *  @param algorithm_parameters (OUT) the value of attribute for this
     *         function.  If this attribute is not supported,
     *         OMPI_ERR_NOT_FOUND is returned.
     */
    typedef int (*mca_bcol_get_collective_attributes)(int attribute,
            void *algorithm_parameters);

/* data structure for tracking the relevant data needed for ml level
 * algorithm construction (e.g., function selection), initialization, and
 * usage.
 */
struct coll_bcol_collective_description_t {
    /* collective initiation function - first functin called */
    mca_bcol_base_module_collective_fn_primitives_t coll_fn;

    /* collective progress function - first functin called */
    mca_bcol_base_module_collective_fn_primitives_t progress_fn;

    /* collective progress function - first functin called */
    mca_bcol_get_collective_attributes get_attributes;

    /* attributes supported - bit map */
    uint64_t attribute;

};
typedef struct coll_bcol_collective_description_t
coll_bcol_collective_description_t;

/* collective operation attributes */
enum {
    /* supports dynamic decisions - e.g., do not need to have the collective
     * operation fully defined before it can be started
     */
    BCOL_ATTRIBUTE_DYNAMIC,

    /* number of attributes */
    BCOL_NUM_ATTRIBUTES
};

/* For rooted collectives,
 * does the algorithm knows its data source ?
 */
enum {
    DATA_SRC_KNOWN=0,
    DATA_SRC_UNKNOWN,
    DATA_SRC_TYPES
};

enum {
    BLOCKING,
    NON_BLOCKING
};
/* gvm For selection logic */
struct mca_bcol_base_coll_fn_comm_attributes_t {
    int bcoll_type;
    int comm_size_min;
    int comm_size_max;
    int data_src;
    int waiting_semantics;
};

typedef struct mca_bcol_base_coll_fn_comm_attributes_t
                        mca_bcol_base_coll_fn_comm_attributes_t;

struct mca_bcol_base_coll_fn_invoke_attributes_t {
    int bcol_msg_min;
    int bcol_msg_max;
    uint64_t datatype_bitmap; /* Max is OMPI_DATATYPE_MAX_PREDEFINED defined to be 45 */
    uint32_t op_types_bitmap; /* bit map of optypes supported */
};

typedef struct mca_bcol_base_coll_fn_invoke_attributes_t
                        mca_bcol_base_coll_fn_invoke_attributes_t;

struct mca_bcol_base_coll_fn_desc_t {
    opal_list_item_t super;
    struct mca_bcol_base_coll_fn_comm_attributes_t *comm_attr;
    struct mca_bcol_base_coll_fn_invoke_attributes_t *inv_attr;
    mca_bcol_base_module_collective_fn_primitives_t coll_fn;
    mca_bcol_base_module_collective_fn_primitives_t progress_fn;
};

typedef struct mca_bcol_base_coll_fn_desc_t mca_bcol_base_coll_fn_desc_t;
OBJ_CLASS_DECLARATION(mca_bcol_base_coll_fn_desc_t);

/* end selection logic */

typedef int (*mca_bcol_base_module_collective_init_fn_t)
    (struct mca_bcol_base_module_t *bcol_module,
     mca_sbgp_base_module_t *sbgp_module);

    /* per communicator memory initialization function */
typedef  int (*mca_bcol_module_mem_init)(struct ml_buffers_t *registered_buffers,
 mca_bcol_base_component_t *module);

/* Initialize memory block - ml_memory_block initialization interface function
 *
 * Invoked at the ml level, used to pass bcol specific registration information
 * for the "ml_memory_block"
 *
 * @param[in] ml_memory_block   Pointer to the ml_memory_block. This struct
 *  contains bcol specific registration information and a call back function
 *  used for resource recycling.
 *
 * @param[in] reg_data         bcol specific registration data.
 *
 * @returns   On Success: OMPI_SUCCESS
 *            On Failure: OMPI_ERROR
 *
 */
/*typedef int (*mca_bcol_base_init_memory_fn_t)
    (struct ml_memory_block_desc_t *ml_block, void *reg_data);*/

typedef int (*mca_bcol_base_init_memory_fn_t)
    (struct mca_coll_ml_module_t *ml_module,
     struct mca_bcol_base_module_t *bcol_module,
     void *reg_data);

typedef int (*mca_common_allgather_init_fn_t)
    (struct mca_bcol_base_module_t *bcol_module);

typedef void (*mca_bcol_base_set_thresholds_fn_t)
    (struct mca_bcol_base_module_t *bcol_module);

enum {
    MCA_BCOL_BASE_ZERO_COPY                   = 1,
    MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG  = 1 << 1,
    MCA_BCOL_BASE_NO_ML_BUFFER_FOR_BARRIER    = 1 << 2
};

/* base  module */
struct mca_bcol_base_module_t {
    /* base coll component */
    opal_object_t super;

    /* bcol component (Pasha: Do we really need cache the component?)*/
    mca_bcol_base_component_t *bcol_component;

    /* network context that is used by this bcol
    only one context per bcol is allowed */
    bcol_base_network_context_t *network_context;

    /* We are going to use the context index a lot,
    int order to decrease number of dereferences
    bcol->network_context->index
    we are caching the value on bcol */
    int context_index;

    /* Set of flags that describe features supported by bcol */
    uint64_t supported_mode;

    /* per communicator memory initialization function */
    mca_bcol_module_mem_init init_module;

    /* sub-grouping module partner */
    mca_sbgp_base_module_t *sbgp_partner_module;

    /* size of subgroup - cache this, so can have access when
     * sbgp_partner_module no longer existes */
    int size_of_subgroup;

    /* sequence number offset - want to make sure that we start
     * id'ing collectives with id 0, so we can have simple
     * resource management.
     */
    int64_t squence_number_offset;


    /* number of times to poll for operation completion before
     * breaking out of a non-blocking collective operation
     */
    int n_poll_loops;

    /* size of header that will go in data buff, should not include
     * any info regarding alignment, let the ml level handle this
     */
    uint32_t header_size;


   /* Each bcol is assigned a unique value
    * see if we can get away with 16-bit id
    */
    int16_t bcol_id;

    /*FIXME:
     * Since mca_bcol_base_module_t is the only parameter which will be passed
     * into the bcol_basesmuma_bcast_init(), add the flag to indicate whether
     * the hdl-based algorithms will get enabled.
     */
    bool use_hdl;
        /*
     * Collective function pointers
     */
    /* changing function signature - will replace bcol_functions */
    mca_bcol_base_module_collective_fn_primitives_t bcol_function_table[BCOL_NUM_OF_FUNCTIONS];

    /* Tables hold pointers to functions */
    mca_bcol_base_module_collective_init_fn_primitives_t bcol_function_init_table[BCOL_NUM_OF_FUNCTIONS];
    opal_list_t bcol_fns_table[BCOL_NUM_OF_FUNCTIONS];
    struct mca_bcol_base_coll_fn_desc_t*
    filtered_fns_table[DATA_SRC_TYPES][2][BCOL_NUM_OF_FUNCTIONS][NUM_MSG_RANGES+1][OMPI_OP_NUM_OF_TYPES][OMPI_DATATYPE_MAX_PREDEFINED];

    /*
     * Bcol interface function to pass bcol specific
     * info and memory recycling call back
     */
    mca_bcol_base_init_memory_fn_t bcol_memory_init;

    /*
     * netpatterns interface function, would like to invoke this on
     * on the ml level
     */
    mca_common_allgather_init_fn_t k_nomial_tree;
     /* Each bcol caches a list which describes how many ranks
     * are "below" each rank in this bcol
     */
    int *list_n_connected;

    /* offsets for scatter/gather */
    int hier_scather_offset;

    /* Small message threshold for each collective */
    int small_message_thresholds[BCOL_NUM_OF_FUNCTIONS];

    /* Set small_message_thresholds array */
    mca_bcol_base_set_thresholds_fn_t set_small_msg_thresholds;

    /* Pointer to the order counter on the upper layer,
       used if the bcol needs to be ordered */
    int *next_inorder;
};
typedef struct mca_bcol_base_module_t mca_bcol_base_module_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_module_t);

struct mca_bcol_base_descriptor_t {
    ompi_free_list_item_t super;
/* Vasily: will be described in the future */
};
typedef struct mca_bcol_base_descriptor_t mca_bcol_base_descriptor_t;

#define MCA_BCOL_CHECK_ORDER(module, bcol_function_args)                     \
    do {                                                                     \
        if (*((module)->next_inorder) !=                                     \
                               (bcol_function_args)->order_info.order_num) { \
            return BCOL_FN_NOT_STARTED;                                      \
        }                                                                    \
    } while (0);

#define MCA_BCOL_UPDATE_ORDER_COUNTER(module, order_info) \
    do {                                                  \
       (order_info)->bcols_started++;                     \
        if ((order_info)->n_fns_need_ordering ==          \
                        (order_info)->bcols_started) {    \
            ++(*((module)->next_inorder));                \
        }                                                 \
    } while (0);

#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_BCOL_H */