KMR
kmr.h
Go to the documentation of this file.
1 /* kmr.h (2014-02-04) */
2 /* Copyright (C) 2012-2018 RIKEN R-CCS */
3 
4 #ifndef _KMR_H
5 #define _KMR_H
6 
7 #define KMR_H 20201116
8 
9 /** \file kmr.h KMR Interface. GENERAL NOTES. (1) The sizes of
10  key-value fields are rounded up to 8-byte boundary. */
11 
12 /** \mainpage KMR
13  \htmlinclude kmr-overview.html */
14 
15 #include <stdio.h>
16 #include <stddef.h>
17 #include <inttypes.h>
18 #include <sys/types.h>
19 #include <string.h>
20 #include <assert.h>
21 
22 /* Poor guess of K or Fujitsu FX10. */
23 #if defined(__sparc) && defined(__HPC_ACE__)
24 #define __K 1
25 #endif
26 
27 #define KMR_BR0 {
28 #define KMR_BR1 }
29 #ifdef __cplusplus
30 extern "C" KMR_BR0
31 #endif
32 
33 #ifdef __cplusplus
34 #ifdef _Bool
35 #undef _Bool
36 #endif
37 typedef unsigned char _Bool;
38 #endif
39 
40 /* KMR version number. */
41 
42 #define KMR_API_ID0(X) KMR_API_ID1(X)
43 #define KMR_API_ID1(X) kmr_api_ ## X
44 #define KMR_API_ID KMR_API_ID0(KMR_H)
45 
46 extern int KMR_API_ID;
47 extern const int kmr_version;
48 
49 /* MPI tags used as replies in kmr_map_via_spawn(). */
50 
51 #define KMR_TAG_SPAWN_REPLY 500
52 #define KMR_TAG_SPAWN_REPLY1 501
53 
54 struct kmr_ctx;
55 union kmr_kvs;
56 
57 typedef struct kmr_ctx KMR;
58 typedef union kmr_kvs KMR_KVS;
59 
60 #define KMR_JOB_NAME_LEN 256
61 
62 /** Makes a new key-value stream (of type KMR_KVS) with the specified
63  field datatypes. */
64 
65 #define kmr_create_kvsx(MR,KF,VF,OPT) \
66  kmr_create_kvs7((MR), (KF), (VF), (OPT), __FILE__, __LINE__, __func__)
67 
68 /** Makes a new key-value stream (of type KMR_KVS) with the specified
69  field datatypes. */
70 
71 #define kmr_create_kvs(MR,KF,VF) \
72  kmr_create_kvs7((MR), (KF), (VF), kmr_noopt, __FILE__, __LINE__, __func__)
73 
74 /** Makes a new key-value stream (of type KMR_KVS). */
75 
76 #define kmr_create_kvs_(MR,IGNORE) \
77  kmr_create_kvs7((MR), KMR_KV_BAD, KMR_KV_BAD, kmr_noopt, \
78  __FILE__, __LINE__, __func__)
79 
80 /** Maps simply. See kmr_map9(). */
81 
82 #define kmr_map(KVI, KVO, ARG, OPT, M) \
83  kmr_map9(0, (KVI), (KVO), (ARG), (OPT), (M), \
84  __FILE__, __LINE__, __func__)
85 
86 /** Reduces key-value pairs. See kmr_reduce9(). */
87 
88 #define kmr_reduce(KVI, KVO, ARG, OPT, R) \
89  kmr_reduce9(0, (KVI), (KVO), (ARG), (OPT), (R), \
90  __FILE__, __LINE__, __func__)
91 
92 /* Link of Key-Value Streams. */
93 
94 struct kmr_kvs_list {
95  KMR_KVS *prev, *next;
96 };
97 
99  KMR_KVS *head, *tail;
100 };
101 
102 struct kmr_ckpt_ctx;
103 struct kmr_trace;
104 
105 /** Information of Source Code Line. */
106 
107 struct kmr_code_line { const char *file; const char *func; int line; };
108 
109 /** KMR Context. Structure KMR is a common record of key-value
110  streams. It records a few internal states and many options.
111 
112  KVSES is a linked-list recording all active key-value streams. It
113  is used to warn about unfreed key-value streams.
114 
115  CKPT_KVS_ID_COUNTER and CKPT_CTX record checkpointing states.
116 
117  LOG_TRACES is a file stream, when it is non-null, which records
118  times taken by each call to map/reduce-functions. Note that trace
119  routines call MPI_Wtime() in OMP parallel regions (although it may
120  be non-threaded). ATWORK indicates the caller of the current work
121  of mapping or reducing (or null if it is not associated), which is
122  used in logging traces.
123 
124  SPAWN_SIZE and SPAWN_COMMS temporarily holds an array of
125  inter-communicators for kmr_map_via_spawn(), so that a
126  communicator can be obtained by kmr_get_spawner_communicator() in
127  a map-function.
128 
129  MAPPER_PARK_SIZE is the number of entries pooled before calling a
130  map-function. Entries are aggregated to try to call a
131  map-function with threads. PRESET_BLOCK_SIZE is the default
132  allocation size of a buffer of key-values. It is used as a
133  block-size of key-value streams after trimmed by the amount of the
134  malloc overhead. MALLOC_OVERHEAD (usually an amount of one
135  pointer) is reduced from the allocation size, to keep good
136  alignment boundary.
137 
138  ATOA_THRESHOLD makes the choice of algorithms of all-to-all-v
139  communication by the sizes of messages (set to zero to use
140  all-to-all-v of MPI).
141 
142  ATOA_SIZE_LIMIT is normally 0. It is mainly for tests. It lowers
143  the limit of the data size of using MPI all-to-all-v from 16GB to
144  the specified value. When the data size exceeds the value, a
145  naive method using isend/irecv is used instead of MPI
146  all-to-all-v.
147 
148  ATOA_REQUESTS_LIMIT is the limit of the number of isend/irecv
149  requests which are pending in a naive all-to-all-v algorithm, that
150  is used when the data size exceeds the 16GB. It is normally 0,
151  which sets it to 4096.
152 
153  SORT_TRIVIAL determines the sorter to run on a single node when
154  data size is this value or smaller. SORT_THRESHOLD determines the
155  sorter to use full sampling of a sampling-sort, or pseudo sampling
156  when data size is small. SORT_SAMPLES_FACTOR determines the
157  number of samples of a sampling-sort. SORT_THREADS_DEPTH controls
158  the local sorter. The quick-sort uses Open MP threads until
159  recursion depth reaches this value (set to zero for sequential
160  run).
161 
162  FILE_IO_BLOCK_SIZE is a block size of file reading, used when the
163  striping information is not available.
164 
165  PUSHOFF_BLOCK_SIZE is a block size of a push-off key-value stream.
166  It is a communication block size and should be eqauls on all
167  ranks. PUSHOFF_POLL_RATE gives a hint to a polling interval of a
168  push-off key-value stream.
169 
170  KMR_INSTALLATION_PATH records the installation path, which is
171  taken from the configuration. SPAWN_WATCH_PROGRAM is a
172  watch-program name, which is used in spawning processes which do
173  not communicate to the parent. The variable is a file-path which
174  may be set in advance or may be set to one where the watch-program
175  is copied (usually in the user's home directory).
176  SPAWN_WATCH_PREFIX is a location where a watch-program is to be
177  installed (instead of the home directory). SPAWN_WATCH_HOST_NAME
178  is a name of a host-name of a spawner. It may be set when there
179  is a difficulty in connecting a socket. SPAWN_MAX_PROCESSES
180  limits the number of processes simultaneously spawned without
181  regard to the universe size. SPAWN_WATCH_AF is 0, 4, or 6 as the
182  preferred IP address format used by the watch-program.
183  SPAWN_WATCH_PORT_RANGE[2] is a range of IP port number used by the
184  watch-program (values are inclusive). SPAWN_GAP_MSEC[2] is the
185  time given between spawning calls needed by the MPI runtime to
186  clean-up the resource management. The value is scaled to the log
187  of the universe size, corresponding the 1st value to 0 processes
188  and the 2nd value to 1,000 processes (the default is 1 second to
189  one process and 10 seconds for 1,000 processes).
190 
191  SPAWN_SELF holds the communicator used in spawning. KMR retries
192  MPI_Comm_spawn() because it can fail due to the race between an
193  issue and a delay in job scheduling. SPAWN_RETRY_LIMIT and
194  SPAWN_RETRY_GAP_MSEC control retries of MPI_Comm_spawn(). It
195  reties MPI_Comm_spawn() by SPAWN_RETRY_LIMIT times taking a
196  SPAWN_RETRY_GAP_MSEC sleep in between (300 seconds in total by
197  default).
198 
199  SPAWN_WATCH_ACCEPT_ONHOLD_MSEC is the time given to wait for the
200  watch-program to connect back by a socket.
201 
202  VERBOSITY is the verbosity of warning messages; default 5 is good
203  for typical use.
204 
205  ONK enables the features on K or FX10. SINGLE_THREAD makes imply
206  the nothreading option for mapper/shuffler/reducer. ONE_STEP_SORT
207  disables a prior sorting step which sort on (packed/hashed)
208  integer keys in local sorting. STEP_SYNC is to call a barrier at
209  each operation step for debugging. TRACE_FILE_IO, TRACE_MAP_MS,
210  and TRACE_MAP_SPAWN let dump trace output for debugging.
211  (TRACE_ALLTOALL lets dump trace output on communication for
212  debugging internals). TRACE_KMRDP lets dump timing information of
213  run of KMR-DP. STD_ABORT lets use abort() instead of MPI_Abort()
214  on errors, to let cores dumped on some MPI implementations.
215  (FILE_IO_DUMMY_STRIPING is for debugging internals, and assigns
216  dummy striping information on not Lustre file-systems).
217  (FILE_IO_ALWAYS_ALLTOALLV is for debugging internals).
218  MAP_MS_USE_EXEC forces KMR use fork-execing instead of system(3C)
219  to start a subprocess in kmr_map_ms_commands(). KMR also uses
220  fork-execing when command strings include null characters (not at
221  the end). MAP_MS_ABORT_ON_SIGNAL makes KMR abort when a
222  subprocess is killed in kmr_map_ms_commands().
223  SPAWN_DISCONNECT_EARLY (useless) lets the spawner free the
224  inter-communicator immediately after spawning.
225  SPAWN_DISCONNECT_BUT_FREE lets the spawner use
226  MPI_Comm_disconnect() instead of MPI_Comm_free() (It is only used
227  with buggy Intel MPI (4.x)). (SPAWN_PASS_INTERCOMM_IN_ARGUMENT
228  changes the behavior to the old API). MPI_THREAD_SUPPORT records
229  the thread support level. CKPT_ENABLE is a checkpointing enable.
230  CKPT_SELECTIVE enables users to specify which kmr functions take
231  ckpt files of the output key-value stream. To take ckpt files
232  with this option enabled, users should specify TAKE_CKPT option
233  enabled when calling a kmr function. CKPT_NO_FSYNC does not call
234  fsync syscall on writing ckpt files. Both CKPT_SELECTIVE and
235  CKPT_NO_FSYNC should be specified with CKPT_ENABLE.
236  STOP_AT_SOME_CHECK_GLOBALLY forces global checking of stop-at-some
237  state in mapping (not implemented). Mapping with stop-at-some
238  should be stopped when some key-value is added on any rank, but
239  the check is performed only locally by default. PUSHOFF_HANG_OUT
240  makes communication of push-off continue on after a finish of
241  mapping/reducing. PUSHOFF_FAST_NOTICE enables use of RDMA put for
242  event notification in push-off key-value streams. PUSHOFF_STAT
243  enables collecting statistics of communication in push-off
244  key-value streams. KMRVIZ_TRACE enables tracing kmr function
245  calls for KMRViz. IDENTIFYING_NAME is just a note. */
246 
247 struct kmr_ctx {
248  MPI_Comm comm;
249  int nprocs;
250  int rank;
251  MPI_Info conf;
252  struct kmr_kvs_list_head kvses;
253 
254  long ckpt_kvs_id_counter;
255  struct kmr_ckpt_ctx *ckpt_ctx;
256 
257  struct kmr_trace *kvt_ctx;
258 
259  FILE *log_traces;
260  struct kmr_code_line *atwork;
261 
262  int spawn_size;
263  MPI_Comm **spawn_comms;
264 
265  long mapper_park_size;
266  size_t preset_block_size;
267  size_t malloc_overhead;
268 
269  long atoa_threshold;
270  long atoa_size_limit;
271  int atoa_requests_limit;
272 
273  long sort_trivial;
274  long sort_threshold;
275  long sort_sample_factor;
276  int sort_threads_depth;
277 
278  long file_io_block_size;
279 
280  int rlimit_nofile;
281 
282  char *kmr_installation_path;
283  char *spawn_watch_program;
284  char *spawn_watch_prefix;
285  char *spawn_watch_host_name;
286  int spawn_max_processes;
287  int spawn_watch_af;
288  int spawn_watch_port_range[2];
289  int spawn_gap_msec[2];
290  int spawn_watch_accept_onhold_msec;
291 
292  MPI_Comm spawn_self;
293  int spawn_retry_limit;
294  int spawn_retry_gap_msec;
295 
296  void *simple_workflow;
297  void *swf_spawner_so;
298  char *swf_spawner_library;
299  size_t swf_args_size;
300 
301  size_t pushoff_block_size;
302  int pushoff_poll_rate;
303 
304  uint8_t verbosity;
305 
306  _Bool onk : 1;
307  _Bool single_thread : 1;
308  _Bool one_step_sort : 1;
309  _Bool step_sync : 1;
310  _Bool trace_sorting : 1;
311  _Bool trace_file_io : 1;
312  _Bool trace_map_ms : 1;
313  _Bool trace_map_spawn : 1;
314  _Bool trace_alltoall : 1;
315  _Bool trace_kmrdp : 1;
316  _Bool trace_iolb : 1;
317  _Bool std_abort : 1;
318  _Bool file_io_dummy_striping : 1;
319  _Bool file_io_always_alltoallv : 1;
320  _Bool map_ms_use_exec : 1;
321  _Bool map_ms_abort_on_signal : 1;
322  _Bool spawn_sync_at_startup : 1;
323  _Bool spawn_watch_all : 1;
324  _Bool spawn_disconnect_early : 1;
325  _Bool spawn_disconnect_but_free : 1;
326  _Bool spawn_pass_intercomm_in_argument : 1;
327  _Bool keep_fds_at_fork : 1;
328 
329  _Bool mpi_thread_support : 1;
330 
331  _Bool ckpt_enable : 1;
332  _Bool ckpt_selective : 1;
333  _Bool ckpt_no_fsync : 1;
334 
335  _Bool stop_at_some_check_globally : 1;
336 
337  _Bool pushoff_hang_out : 1;
338  _Bool pushoff_fast_notice : 1;
339  _Bool pushoff_stat : 1;
340 
341  _Bool swf_exec_so;
342  _Bool swf_record_history;
343  _Bool swf_debug_master;
344 
345  struct {
346  double times[4];
347  long counts[10];
348  } pushoff_statistics;
349 
350  _Bool kmrviz_trace : 1;
351 
352  char identifying_name[KMR_JOB_NAME_LEN];
353 };
354 
355 /** Datatypes of Keys or Values. It indicates the field data of keys
356  or values. KMR_KV_OPAQUE is a variable-sized byte vector, and
357  KMR_KV_CSTRING is a non-wide C string, and they are dealt with in
358  exactly the same way. KMR_KV_INTEGER is a long integer, and
359  KMR_KV_FLOAT8 is a double. The datatypes are mostly uninterpreted
360  in mapping/reducing, except for in sorting. There are two other
361  types for pointers. Pointers can be stored as they are (unlike
362  opaque data, which are embedded in the field), but converted to
363  opaque ones before communication. KMR_KV_POINTER_OWNED is an
364  allocated pointer, and the data will be freed on consuming a
365  key-value stream. KMR_KV_POINTER_UNMANAGED is a pointer to a
366  possibly shared data. */
367 
369  KMR_KV_BAD,
370  KMR_KV_OPAQUE,
371  KMR_KV_CSTRING,
372  KMR_KV_INTEGER,
373  KMR_KV_FLOAT8,
374  KMR_KV_POINTER_OWNED,
375  KMR_KV_POINTER_UNMANAGED
376 };
377 
378 /** Unit-Sized Storage. A key or a value is passed around as
379  unit-sized data. It is accessed by ".i" for KMR_KV_INTEGER, ".d"
380  for KMR_KV_FLOAT8, and ".p" KMR_KV_OPAQUE or other pointer cases.
381  It appears in struct kmr_kv_box. */
382 
383 union kmr_unit_sized {const char *p; long i; double d;};
384 
385 /* Storage of a Key-Value Entry. They are stored as key-length;
386  value-length; key-content[]; value-content[]. Occupied lengths are
387  rounded up to the alignment, thus the size of the entry is:
388  (2*sizeof(int)+KMR_ALIGN(klen)+KMR_ALIGN(vlen)). Flexible-array
389  member is avoided for C++. */
390 
392  int klen;
393  int vlen;
394  unsigned char c[1];
395 };
396 
397 /** Handy Copy of a Key-Value Field. It holds a pair of a key and a
398  value. The field may either be a copy (for an integer or a real
399  value) or a pointer to the storage. */
400 
401 struct kmr_kv_box {
402  int klen;
403  int vlen;
404  union kmr_unit_sized k;
405  union kmr_unit_sized v;
406 };
407 
408 #define kmr_kv_cake kmr_kv_box
409 
410 /** Keyed-Record for Sorting. A ranking key V is associated with an
411  entry for sorting. An array of this record is held during local
412  sorting. The ranking keys are integers respecting the ordering of
413  integers, doubles, and opaque bytes. */
414 
416  long v;
417  struct kmr_kvs_entry *e;
418 };
419 
420 /** Size of an Entry Header. It is the size of the length fields of a
421  key-value. It is also the size of the slack space for an
422  end-of-block marker, where the end of entries in a block is marded
423  by klen=-1 and vlen=-1. */
424 
425 static const size_t kmr_kvs_entry_header = offsetof(struct kmr_kvs_entry, c);
426 
427 /* Types of Key-Value Stream. It is a tag stored in the header.
428  KMR_KVS_ONCORE corresponds to struct kmr_kvs_oncore, and
429  KMR_KVS_PUSHOFF to struct kmr_kvs_pusoff. (KMR_KVS_STREAM is
430  undefined currently). PACKED is a marker after packing (marshaling
431  to byte arrays). */
432 
433 enum kmr_kvs_magic {
434  KMR_KVS_BAD,
435  KMR_KVS_ONCORE,
436  /*KMR_KVS_STREAM,*/
437  KMR_KVS_PUSHOFF,
438  KMR_KVS_ONCORE_PACKED
439  /*KMR_KVS_STREAM_PACKED*/
440 };
441 
442 #define KMR_KVS_MAGIC_OK(X) \
443  ((X) == KMR_KVS_ONCORE || (X) == KMR_KVS_ONCORE_PACKED \
444  || (X) == KMR_KVS_PUSHOFF)
445 
446 /** State during kmr_map_ms(). It is kept in a key-value stream only
447  during kmr_map_ms() on rank0. (IT SHOULD BE INCLUDED IN THE STATE
448  OF kmr_save_kvs() AND kmr_restore_kvs(), BUT NOT YET). IDLES is
449  the count of ranks waiting for finish, KICKS is the count of jobs
450  started, and DONES is the count of jobs finished. Flexible-array
451  member is avoided for C++. */
452 
454  int idles;
455  int kicks;
456  int dones;
457  char states[1];
458 };
459 
460 /** Key-Value Stream. MAGIC and MR holds the relevant data. LINK is
461  used to list all allocated key-value streams in the context to
462  detect unfreed ones. KEY_DATA is a datatype of a key field.
463  VALUE_DATA is a datatype of a value field. INFO_LINE0 records the
464  location of its allocation in a program. ELEMENT_COUNT is the
465  local count of the number of elements.
466 
467  STOWED is set when adding key-value is finished. ONCORE is always
468  true (currently ignored). NOGROW is set when the buffer is
469  preallocated by an enough size. SORTED indicates the key-value
470  stream is locally sorted. SHUFFLED_IN_PUSHOFF is true when its
471  contents are already shuffled. (UNIFORMLY_SIZED is set when all
472  key-values are of the same size. (currently ignored)).
473  (SHUFFLE_TO_SINGLE is set when the shuffling target is a single rank,
474  which only checked locally at each rank. (currently ignored)).
475 
476  Fields start from CKPT_ are used only when checkpoint/restart is
477  enabled. CKPT_KVS_ID stores a sequence number starts from 0 which
478  is assigned to Key-Value Stream and used to distinguish each other.
479  CKPT_GENERATED_OP and CKPT_CONSUMED_OP store sequence numbers of
480  operations performed to this Key-Value Stream. When this Key-Value
481  Stream is used as an output, the sequence number of the operation
482  is set to CKPT_GENERATED_OP. When this Key-Value Stream is used as
483  an input, the sequence number of the operation is set to
484  CKPT_CONSUMED_OP.
485 
486  BLOCK_SIZE is an unit of memory allocation invoked at adding a
487  key-value pair. It is PRESET_BLOCK_SIZE in the KMR-context by
488  default. ELEMENT_SIZE_LIMIT restricts the size of one key-value
489  pair (for error detection), and it is set to the 1/4 of BLOCK_SIZE
490  by default.
491 
492  STORAGE_NETSIZE is the local total size used to hold key-value
493  pairs. The size is the sum of kmr_kvs_entry_netsize()
494  corresponding to opaque fields, and is much larger than actually
495  occupied when the data fields are pointers. FIRST_BLOCK points to
496  the chain of blocks of data.
497 
498  MS field holds a state during master-worker mapping.
499 
500  A pair of transient fields CURRENT_BLOCK and ADDING_POINT points
501  to the current insertion point. */
502 
504  enum kmr_kvs_magic magic;
505  KMR *mr;
506  struct kmr_kvs_list link;
507  enum kmr_kv_field key_data;
508  enum kmr_kv_field value_data;
509  struct kmr_code_line info_line0;
510  long element_count;
511 
512  _Bool stowed : 1;
513  _Bool oncore : 1;
514  _Bool nogrow : 1;
515  _Bool sorted : 1;
516  _Bool shuffled_in_pushoff : 1;
517  _Bool _uniformly_sized_ : 1;
518  //_Bool _shuffle_to_single_ : 1;
519 
520  long ckpt_kvs_id;
521  long ckpt_generated_op;
522  long ckpt_consumed_op;
523 
524  size_t block_size;
525  size_t element_size_limit;
526 
527  size_t storage_netsize;
528  long block_count;
529  struct kmr_kvs_block *first_block;
530 
531  /* Transient fields: */
532 
533  struct kmr_map_ms_state *ms;
534 
535  _Bool under_threaded_operation;
536  struct kmr_kvs_block *current_block;
537  struct kmr_kvs_entry *adding_point;
538  void *temporary_data;
539 
540  //#ifdef _OPENMP
541  //omp_lock_t mutex;
542  //#endif /*_OPENMP*/
543 };
544 
545 /* Storage of Key-Value Pairs. The storage is managed as blocks, each
546  containing entries terminated by an end-of-block marker. A marker
547  is of klen=-1 and vlen=-1 with no data. A MARKER IS MANDATORYILY
548  PLACED TO CHECK THE END (the early design did not have FILL_SIZE
549  field). SIZE is the whole structure (not just the data part).
550  PARTIAL_ELEMENT_COUNT is the count in this block. FILL_SIZE is a
551  next fill-point. Flexible-array member is avoided for C++. */
552 
554  struct kmr_kvs_block *next;
555  size_t size;
556  long partial_element_count;
557  size_t fill_size;
558  struct kmr_kvs_entry data[1];
559 };
560 
561 /* Size of a Block Header. */
562 
563 static const size_t kmr_kvs_block_header = offsetof(struct kmr_kvs_block, data);
564 
565 /** Record of Push-Off Key-Value Stream for a Rank. FILLBUF is for
566  fill-in and RECVBUF receiving. SENDBUFS[2] is a list of blocks,
567  where SENDBUFS[0] points to the head and and SENDBUFS[1] to the
568  tail. */
569 
571  struct kmr_kvs_entry *adding_point;
572  struct kmr_kvs_block *fillbuf;
573  struct kmr_kvs_block *recvbuf;
574  struct kmr_kvs_block *sendbufs[2];
575  int sends;
576  _Bool closed[2];
577 };
578 
579 /** Key-Value Stream with Shuffling at Addition of Key-Values.
580  ELEMENT_COUNT counts local calls of kmr_add_kv(), and it is not
581  the result count which is the count of the elements received.
582  STORAGE keeps the true contents temporarily, and later it will
583  replace this key-value stream. PEERS and REQS are used
584  communication. REQS is of length (2*nprocs), the first half for
585  sends and the second half for receives. */
586 
588  enum kmr_kvs_magic magic;
589  KMR *mr;
590  struct kmr_kvs_list link;
591  enum kmr_kv_field key_data;
592  enum kmr_kv_field value_data;
593  struct kmr_code_line info_line0;
594  long element_count;
595 
596  _Bool stowed : 1;
597  _Bool oncore : 1;
598  _Bool nogrow : 1;
599  _Bool sorted : 1;
600  _Bool shuffled_in_pushoff : 1;
601  _Bool _uniformly_sized_ : 1;
602 
603  int seqno;
604  KMR_KVS *storage;
605  struct kmr_pushoff_buffers *peers;
606  MPI_Request *reqs;
607  int *indexes;
608  MPI_Status *statuses;
609 };
610 
611 /** Key-Value Stream (DUMMY); Mandatory Entries. */
612 
614  enum kmr_kvs_magic magic;
615  KMR *mr;
616  struct kmr_kvs_list link;
617  enum kmr_kv_field key_data;
618  enum kmr_kv_field value_data;
619  struct kmr_code_line info_line0;
620  long element_count;
621 
622  _Bool stowed : 1;
623  _Bool oncore : 1;
624  _Bool nogrow : 1;
625  _Bool sorted : 1;
626  _Bool shuffled_in_pushoff : 1;
627  _Bool _uniformly_sized_ : 1;
628 };
629 
630 /** Key-Value Stream (abstract). */
631 
632 union kmr_kvs {
633  struct kmr_kvs_dummy m;
634  struct kmr_kvs_oncore c;
635  struct kmr_kvs_pushoff o;
636 };
637 
638 /** Options to Mapping, Shuffling, and Reduction. Options must be the
639  same on all ranks. NOTHREADING tells a mapper and a reducer to
640  suppress threading on loops calling a map-function or a
641  reduce-function. It should be set when a map-function or a
642  reduce-function is multi-threaded itself. INSPECT tells a mapper
643  that a map-function just inspects the entries, and does not
644  consume the input key-value stream. KEEP_OPEN tells a mapper not
645  to close the output key-value stream for further additions of
646  key-value pairs. KEY_AS_RANK tells a shuffler to use a key as a
647  rank. TAKE_CKPT tells kmr functions that support
648  Checkpoint/Restart to take checkpoints of the output key-value
649  stream when CKPT_SELECTIVE global option is enabled. RANK_ZERO
650  tells kmr_replicate() to gather pairs on rank0 only. (COLLAPSE
651  tells a mapper to convert pointer data to opaque. Conversions of
652  data fields are needed in advance to exchanging data in shuffling.
653  It is mainly intended internal use only). The padding fields make
654  the option size as long, for the Fortran binding whose options are
655  passed by intergers (SPARC-V9 compilers put bit-fields on
656  arguments as 64 bits). */
657 
658 struct kmr_option {
659  _Bool nothreading : 1;
660  _Bool inspect : 1;
661  _Bool keep_open : 1;
662  _Bool key_as_rank : 1;
663  _Bool rank_zero : 1;
664  _Bool collapse : 1;
665  _Bool take_ckpt : 1;
666  /*_Bool guess_pattern : 1;*/
667  int : 16;
668  int : 32;
669 };
670 
671 /* Initializers are not by designators for C++ below. */
672 
673 static const struct kmr_option kmr_noopt = {0, 0, 0, 0, 0, 0, 0};
674 static const union {struct kmr_option o; unsigned long bits;}
675  kmr_optmask = {{1, 1, 1, 1, 1, 1, 1}};
676 
677 /** Options to Mapping on Files. See the description on
678  kmr_map_file_names() and kmr_map_file_contents() for the meaning.
679  The padding fields make the option size as long, for the Fortran
680  binding whose options are passed by intergers (SPARC-V9 compilers
681  put bit-fields on arguments as 64 bits). */
682 
684  _Bool each_rank : 1;
685  _Bool subdirectories : 1;
686  _Bool list_file : 1;
687  _Bool shuffle_names : 1;
688  int : 16;
689  int : 32;
690 };
691 
692 /* Initializers are not by designators for C++ below. */
693 
694 static const struct kmr_file_option kmr_fnoopt = {0, 0, 0, 0};
695 static const union {struct kmr_file_option o; unsigned long bits;}
696  kmr_foptmask = {{1, 1, 1, 1}};
697 
698 /** Options to Mapping by Spawns. See the description on
699  kmr_map_via_spawn() for the meaning. The padding fields make the
700  option size as long, for the Fortran binding whose options are
701  passed by intergers (SPARC-V9 compilers put bit-fields on
702  arguments as 64 bits). ONE_BY_ONE is no longer used (since
703  ver.178); NO_SET_INFOS took up ONE_BY_ONE bit (since 2016-05-17).
704  TAKE_CKPT tells kmr functions that support
705  Checkpoint/Restart to take checkpoints of the output key-value
706  stream when CKPT_SELECTIVE global option is enabled. */
707 
709  _Bool separator_space : 1;
710  _Bool reply_each : 1;
711  _Bool reply_root : 1;
712  /*_Bool one_by_one : 1;*/
713  _Bool no_set_infos : 1;
714  _Bool take_ckpt : 1;
715  int : 16;
716  int : 32;
717 };
718 
719 /* Initializers are not by designators for C++ below. */
720 
721 static const struct kmr_spawn_option kmr_snoopt = {0, 0, 0, 0, 0};
722 static const union {struct kmr_spawn_option o; unsigned long bits;}
723  kmr_soptmask = {{1, 1, 1, 1, 1}};
724 
725 /** Map-function Type. A map-function gets a key-value pair as struct
726  kmr_kv_box KV. KVI is the input key-value stream, but it can be
727  usually ignored (its potential usage is to check the content type
728  of the key and value fields). KVO is the output key-value stream.
729  The pointer ARG is one just passed to kmr_map(), which has no
730  specific purpose and is used to pass any argument to a
731  map-function. INDEX is the count of map-function calls, and it
732  usually equals to the index of a key-value pair in the input. It
733  is assured distinct, and can be used for race-free accesses to the
734  pointer ARG. */
735 
736 typedef int (*kmr_mapfn_t)(const struct kmr_kv_box kv,
737  const KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
738  const long index);
739 
740 /** Reduce-function Type. A reduce-function gets key-value pairs as
741  an array KV of struct kmr_kv_box. N is the number of key-value
742  pairs. KVI is the the input key-value stream, but it can be
743  usually ignored. KVO is the output key-value stream. The pointer
744  ARG is one just passed to kmr_reduce(), which has no specific
745  purpose and is used to pass any argument to a reduce-function. */
746 
747 typedef int (*kmr_redfn_t)(const struct kmr_kv_box kv[], const long n,
748  const KMR_KVS *kvi, KMR_KVS *kvo, void *arg);
749 
750 /** Spawning Info. (OBSOLETE: USE IS ENABLED BY SETTING
751  SPAWN_PASS_INTERCOMM_IN_ARGUMENT=1 IN THE MKR STRUCTURE). It is
752  passed to a map-function in kmr_map_via_spawn(). MAPARG is the
753  pointer argument to kmr_map_via_spawn(). ICOMM is the
754  inter-communicator, and ICOMM_FF is for Fortran. REPLY_ROOT is a
755  boolean value of the option REPLY_ROOT. Union is taken for icomm
756  to make it independent from its representation (integer or
757  pointer). */
758 
760  void *maparg;
761  union {MPI_Comm icomm; long i_;} u;
762  int icomm_ff;
763  int reply_root;
764 };
765 
766 /** N-Tuple. An n-tuple is a list of opaque values, intended to store
767  a list of data items as a key or a value in a key-value stream.
768  It is for the utility purpose. N is the number of entries.
769  MARKER can be any value and it is used as a tag for a direct-sum,
770  which identifies which key-value stream an n-tuple originally
771  belonged. Use kmr_reset_ntuple() and kmr_put_ntuple() to create
772  n-tuples, and kmr_add_ntuple() to add n-tuples in key-value
773  stream. The data items in an n-tuple are a sequence of pairs -- a
774  length followed by opaque bytes. Note the marker and the item
775  length be kept the same when n-tuples are used as keys for
776  sorting. */
777 
778 struct kmr_ntuple {
779  int marker;
780  unsigned short n;
781  unsigned short index;
782  unsigned short len[1];
783 };
784 
785 /** N-Tuple Argument. */
786 
788  void *p;
789  unsigned short len;
790 };
791 
792 /** Sets up the environment. Currently it does nothing. */
793 
794 #define kmr_init() kmr_init_2(KMR_API_ID)
795 
796 extern int kmr_init_2(int ignore);
797 extern int kmr_fin(void);
798 extern int kmr_initialize_mpi(int *refargc, char ***refargv);
799 extern KMR *kmr_create_context(const MPI_Comm comm, const MPI_Info conf,
800  const char *name);
801 extern int kmr_free_context(KMR *mr);
802 extern KMR *kmr_get_context_of_kvs(KMR_KVS const *kvs);
803 
804 extern KMR_KVS *kmr_create_kvs7(KMR *mr, enum kmr_kv_field k,
805  enum kmr_kv_field v,
806  struct kmr_option opt,
807  const char *, const int, const char *);
808 extern int kmr_free_kvs(KMR_KVS *kvs);
809 extern int kmr_move_kvs(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
810 extern int kmr_concatenate_kvs(KMR_KVS *kvs[], int nkvs, KMR_KVS *kvo,
811  struct kmr_option opt);
812 
813 extern int kmr_add_kv(KMR_KVS *kvs, const struct kmr_kv_box kv);
814 extern int kmr_add_kv1(KMR_KVS *kvs, void *k, int klen, void *v, int vlen);
815 extern int kmr_add_kv_space(KMR_KVS *kvs, const struct kmr_kv_box kv,
816  void **keyp, void **valuep);
817 extern int kmr_add_kv_quick_(KMR_KVS *kvs, const struct kmr_kv_box kv);
818 extern int kmr_add_kv_done(KMR_KVS *kvs);
819 extern int kmr_add_string(KMR_KVS *kvs, const char *k, const char *v);
820 
821 extern int kmr_map9(_Bool stop_when_some_added,
822  KMR_KVS *kvi, KMR_KVS *kvo,
823  void *arg, struct kmr_option opt, kmr_mapfn_t m,
824  const char *, const int, const char *);
825 extern int kmr_map_skipping(long from, long stride, long limit,
826  _Bool stop_when_some_added,
827  KMR_KVS *kvi, KMR_KVS *kvo,
828  void *arg, struct kmr_option opt, kmr_mapfn_t m);
829 extern int kmr_map_once(KMR_KVS *kvo, void *arg, struct kmr_option opt,
830  _Bool rank_zero_only, kmr_mapfn_t m);
831 extern int kmr_map_on_rank_zero(KMR_KVS *kvo, void *arg, struct kmr_option opt,
832  kmr_mapfn_t m);
833 extern int kmr_map_rank_by_rank(KMR_KVS *kvi, KMR_KVS *kvo,
834  void *arg, struct kmr_option opt,
835  kmr_mapfn_t m);
836 extern int kmr_map_ms(KMR_KVS *kvi, KMR_KVS *kvo,
837  void *arg, struct kmr_option opt,
838  kmr_mapfn_t m);
839 extern int kmr_map_ms_commands(KMR_KVS *kvi, KMR_KVS *kvo,
840  void *arg, struct kmr_option opt,
841  struct kmr_spawn_option sopt,
842  kmr_mapfn_t m);
843 extern int kmr_map_for_some(KMR_KVS *kvi, KMR_KVS *kvo,
844  void *arg, struct kmr_option opt, kmr_mapfn_t m);
845 
846 extern int kmr_map_via_spawn(KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
847  MPI_Info info, struct kmr_spawn_option opt,
848  kmr_mapfn_t mapfn);
849 extern int kmr_reply_to_spawner(KMR *mr);
850 extern MPI_Comm *kmr_get_spawner_communicator(KMR *mr, long index);
851 
852 extern int kmr_map_processes(_Bool nonmpi,
853  KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
854  MPI_Info info, struct kmr_spawn_option opt,
855  kmr_mapfn_t mapfn);
856 extern int kmr_map_parallel_processes(KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
857  MPI_Info info,
858  struct kmr_spawn_option opt,
859  kmr_mapfn_t mapfn);
860 extern int kmr_map_serial_processes(KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
861  MPI_Info info,
862  struct kmr_spawn_option opt,
863  kmr_mapfn_t mapfn);
864 
865 extern KMR *kmr_create_context_world(void);
866 extern KMR *kmr_create_dummy_context(void);
867 extern int kmr_send_kvs_to_spawner(KMR *mr, KMR_KVS *kvs);
868 extern int kmr_receive_kvs_from_spawned_fn(const struct kmr_kv_box kv,
869  const KMR_KVS *kvi,
870  KMR_KVS *kvo, void *arg,
871  const long index);
872 
873 #define kmr_sort_a_batch(X0,X1,X2,X3) kmr_sort_locally(X0,X1,X2,X3)
874 
875 extern int kmr_sort_locally(KMR_KVS *kvi, KMR_KVS *kvo, _Bool shuffling,
876  struct kmr_option opt);
877 
878 extern int kmr_shuffle(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
879 extern int kmr_replicate(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
880 
881 extern int kmr_reduce9(_Bool stop_when_some_added,
882  KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
883  struct kmr_option opt, kmr_redfn_t r,
884  const char *, const int, const char *);
885 extern int kmr_reduce_as_one(KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
886  struct kmr_option opt, kmr_redfn_t r);
887 extern int kmr_reduce_for_some(KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
888  struct kmr_option opt, kmr_redfn_t r);
889 
890 extern int kmr_file_enumerate(KMR *mr, char **names, int n, KMR_KVS *kvo,
891  struct kmr_file_option fopt);
892 extern int kmr_map_file_names(KMR *mr, char **names, int n,
893  struct kmr_file_option fopt,
894  KMR_KVS *kvo, void *arg,
895  struct kmr_option opt, kmr_mapfn_t m);
896 extern int kmr_map_getline(KMR *mr, FILE *f, long limit, _Bool largebuffering,
897  KMR_KVS *kvo,
898  void *arg, struct kmr_option opt, kmr_mapfn_t m);
899 extern int kmr_map_getline_in_memory_(KMR *mr, void *b, size_t sz, long limit,
900  KMR_KVS *kvo, void *arg,
901  struct kmr_option opt, kmr_mapfn_t m);
902 
903 extern int kmr_take_one(KMR_KVS *kvi, struct kmr_kv_box *kv);
904 extern int kmr_find_key(KMR_KVS *kvi, struct kmr_kv_box ki,
905  struct kmr_kv_box *vo);
906 extern int kmr_find_string(KMR_KVS *kvi, const char *k, const char **vq);
907 
908 extern int kmr_copy_info_to_kvs(MPI_Info src, KMR_KVS *kvo);
909 extern int kmr_copy_kvs_to_info(KMR_KVS *kvi, MPI_Info dst);
910 
911 extern int kmr_get_element_count(KMR_KVS *kvs, long *v);
912 extern int kmr_local_element_count(KMR_KVS *kvs, long *v);
913 
914 extern int kmr_add_identity_fn(const struct kmr_kv_box kv,
915  const KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
916  const long i);
917 extern int kmr_copy_to_array_fn(const struct kmr_kv_box kv,
918  const KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
919  const long i);
920 extern int kmr_save_kvs(KMR_KVS *kvi, void **dataq, size_t *szq,
921  struct kmr_option opt);
922 extern int kmr_restore_kvs(KMR_KVS *kvo, void *data, size_t sz,
923  struct kmr_option opt);
924 
925 extern int kmr_reverse(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
926 extern int kmr_pairing(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
927 extern int kmr_unpairing(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
928 
929 extern int kmr_sort(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
930 extern int kmr_sort_small(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
931 extern int kmr_sort_large(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
932 extern int kmr_sort_by_one(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
933 
934 extern int kmr_match(KMR_KVS *kvi0, KMR_KVS *kvi1, KMR_KVS *kvo,
935  struct kmr_option opt);
936 extern int kmr_ranking(KMR_KVS *kvi, KMR_KVS *kvo, long *count,
937  struct kmr_option opt);
938 extern int kmr_distribute(KMR_KVS *kvi, KMR_KVS *kvo, _Bool cyclic,
939  struct kmr_option opt);
940 extern int kmr_shuffle_leveling_pair_count(KMR_KVS *kvi, KMR_KVS *kvo);
941 extern int kmr_scan_locally(KMR_KVS *kvi, KMR_KVS *carryin,
942  KMR_KVS *kvo, KMR_KVS *carryout, kmr_redfn_t r);
943 extern int kmr_scan_on_values(KMR_KVS *kvi, KMR_KVS *kvo,
944  KMR_KVS *total, kmr_redfn_t r);
945 extern int kmr_choose_first_part(KMR_KVS *kvi, KMR_KVS *kvo,
946  long n, struct kmr_option opt);
947 extern int kmr_legal_minimum_field_size(KMR *mr, enum kmr_kv_field f);
948 extern int kmr_histogram_count_by_ranks(KMR_KVS *kvs, long *frq, double *var,
949  _Bool rankzeroonly);
950 
951 extern int kmr_read_files_reassemble(KMR *mr, char *file, int color,
952  off_t offset, off_t bytes,
953  void **buffer, off_t *readsize);
954 extern int kmr_read_file_by_segments(KMR *mr, char *file, int color,
955  void **buffer, off_t *readsize);
956 
957 extern int kmr_retrieve_kvs_entries(KMR_KVS *kvs, struct kmr_kvs_entry **ev,
958  long n);
959 extern int kmr_retrieve_kv_box_entries(KMR_KVS *kvs, struct kmr_kv_box *ev,
960  long n);
961 extern int kmr_retrieve_keyed_records(KMR_KVS *kvs,
962  struct kmr_keyed_record *ev,
963  long n, _Bool shuffling, _Bool ranking);
964 extern void kmr_dump_slot(union kmr_unit_sized e, int len,
965  enum kmr_kv_field data, char *buf, int buflen);
966 extern int kmr_dump_kv(struct kmr_kv_box kv, const KMR_KVS *kvs,
967  char *buf, int buflen);
968 extern int kmr_dump_kvs(KMR_KVS *kvs, int flag);
969 extern int kmr_dump_kvs_stats(KMR_KVS *, int level);
970 extern void kmr_dump_opaque(const char *p, int siz, char *buf, int buflen);
971 extern int kmr_dump_keyed_records(const struct kmr_keyed_record *ev,
972  KMR_KVS *kvi);
973 
974 extern void kmr_reset_ntuple(struct kmr_ntuple *u, int n, int marker);
975 extern int kmr_put_ntuple(KMR *mr, struct kmr_ntuple *u, const int sz,
976  const void *v, const int vlen);
977 extern int kmr_put_ntuple_long(KMR *mr, struct kmr_ntuple *u, const int sz,
978  long v);
979 extern int kmr_put_ntuple_entry(KMR *mr, struct kmr_ntuple *u, const int sz,
980  struct kmr_ntuple_entry e);
981 extern struct kmr_ntuple_entry kmr_nth_ntuple(struct kmr_ntuple *u, int nth);
982 extern int kmr_size_ntuple(struct kmr_ntuple *u);
983 extern int kmr_size_ntuple_by_lengths(int n, int len[]);
984 extern int kmr_add_ntuple(KMR_KVS *kvo, void *k, int klen,
985  struct kmr_ntuple *u);
986 
987 extern int kmr_separate_ntuples(KMR *mr,
988  const struct kmr_kv_box kv[], const long n,
989  struct kmr_ntuple **vv[2], long cnt[2],
990  int markers[2], _Bool disallow_other_entries);
991 extern int kmr_product_ntuples(KMR_KVS *kvo,
992  struct kmr_ntuple **vv[2], long cnt[2],
993  int newmarker,
994  int slots[][2], int nslots,
995  int keys[][2], int nkeys);
996 
997 extern KMR_KVS *kmr_create_pushoff_kvs(KMR *mr, enum kmr_kv_field kf,
998  enum kmr_kv_field vf,
999  struct kmr_option opt,
1000  const char*, const int, const char*);
1001 extern void kmr_print_statistics_on_pushoff(KMR *mr, char *titlestring);
1002 extern void kmr_init_pushoff_fast_notice_(MPI_Comm, _Bool verbose);
1003 extern void kmr_fin_pushoff_fast_notice_(void);
1004 extern void kmr_check_pushoff_fast_notice_(KMR *mr);
1005 
1006 extern int kmr_assign_file(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt);
1007 
1008 extern int kmr_init_swf(KMR *mr, MPI_Comm splitcomms[4], int master);
1009 extern int kmr_detach_swf_workers(KMR *mr);
1010 extern int kmr_map_swf(KMR_KVS *kvi, KMR_KVS *kvo, void *arg,
1011  struct kmr_spawn_option opt, kmr_mapfn_t mapfn);
1012 extern int kmr_stop_swf_workers(KMR *mr);
1013 extern int kmr_finish_swf(KMR *mr);
1014 extern int kmr_split_swf_lanes_a(KMR *mr, MPI_Comm splitcomms[4],
1015  int root, int *description[], _Bool dump);
1016 extern int kmr_split_swf_lanes(KMR *mr, MPI_Comm splitcomms[4],
1017  int root, char *description[], _Bool dump);
1018 extern void kmr_dump_swf_lanes(KMR *mr);
1019 extern void kmr_dump_swf_history(KMR *mr);
1020 extern void kmr_dump_swf_order_history(KMR *mr, int *history, size_t length);
1021 extern void kmr_free_swf_history(KMR *mr);
1022 extern void kmr_set_swf_verbosity(KMR *mr, int level);
1023 
1024 /* Suppresses warnings of unused constants (for icc). */
1025 
1026 static inline void
1027 kmr_dummy_dummy_dummy_(void)
1028 {
1029  (void)kmr_kvs_entry_header;
1030  (void)kmr_kvs_block_header;
1031  (void)kmr_noopt;
1032  (void)kmr_optmask;
1033  (void)kmr_fnoopt;
1034  (void)kmr_foptmask;
1035  (void)kmr_snoopt;
1036  (void)kmr_soptmask;
1037 }
1038 
1039 #ifdef __cplusplus
1040 KMR_BR1
1041 #endif
1042 
1043 #undef KMR_BR0
1044 #undef KMR_BR1
1045 
1046 /*
1047 Copyright (C) 2012-2018 RIKEN R-CCS
1048 This library is distributed WITHOUT ANY WARRANTY. This library can be
1049 redistributed and/or modified under the terms of the BSD 2-Clause License.
1050 */
1051 
1052 #endif /*_KMR_H*/
int kmr_retrieve_keyed_records(KMR_KVS *kvs, struct kmr_keyed_record *ev, long n, _Bool shuffling, _Bool ranking)
Fills keyed records in an array for sorting.
Definition: kmrbase.c:2902
int kmr_map_serial_processes(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
Maps on processes started by MPI_Comm_spawn() to run serial processes.
Definition: kmrmapms.c:2067
Key-Value Stream (abstract).
Definition: kmr.h:632
int kmr_map_for_some(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
Maps until some key-value are added.
Definition: kmrmoreops.c:1170
int kmr_map_file_names(KMR *mr, char **names, int n, struct kmr_file_option fopt, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
Maps on file names.
Definition: kmrfiles.c:1372
Options to Mapping, Shuffling, and Reduction.
Definition: kmr.h:658
int kmr_finish_swf(KMR *mr)
Clears the lanes of simple workflow.
Definition: kmrwfmap.c:750
int kmr_map_swf(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
Maps with a simple workflow.
Definition: kmrwfmap.c:2112
int kmr_file_enumerate(KMR *mr, char *names[], int n, KMR_KVS *kvo, struct kmr_file_option fopt)
Adds file names in a key-value stream KVO.
Definition: kmrfiles.c:1157
int kmr_add_kv(KMR_KVS *kvs, const struct kmr_kv_box kv)
Adds a key-value pair.
Definition: kmrbase.c:809
int kmr_unpairing(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Extracts a key-value pair from a pairing in the value part, discarding the original key...
Definition: kmrmoreops.c:234
KMR_KVS * kmr_create_pushoff_kvs(KMR *mr, enum kmr_kv_field kf, enum kmr_kv_field vf, struct kmr_option opt, const char *, const int, const char *)
Makes a new key-value stream with the specified field data-types.
Definition: kmraltkvs.c:85
void kmr_reset_ntuple(struct kmr_ntuple *u, int n, int marker)
Resets an n-tuple U with N entries and a MARKER.
Definition: kmrmoreops.c:1234
int kmr_init_swf(KMR *mr, MPI_Comm lanecomms[KMR_LANE_LEVELS], int master)
Initializes the lanes of simple workflow.
Definition: kmrwfmap.c:528
Spawning Info.
Definition: kmr.h:759
int kmr_size_ntuple_by_lengths(int n, int len[])
Returns the storage size of an n-tuple for N entries with LEN[i] size each.
Definition: kmrmoreops.c:1221
int kmr_find_key(KMR_KVS *kvi, struct kmr_kv_box ki, struct kmr_kv_box *vo)
Finds a key-value pair for a key.
Definition: kmrmoreops.c:43
int kmr_put_ntuple(KMR *mr, struct kmr_ntuple *u, const int sz, const void *v, const int vlen)
Adds an entry V with LEN in an n-tuple U whose size is limited to SIZE.
Definition: kmrmoreops.c:1252
int kmr_map_once(KMR_KVS *kvo, void *arg, struct kmr_option opt, _Bool rank_zero_only, kmr_mapfn_t m)
Maps once.
Definition: kmrbase.c:1460
int kmr_map9(_Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m, const char *, const int, const char *)
Maps simply.
Definition: kmrbase.c:1347
int kmr_scan_locally(KMR_KVS *kvi, KMR_KVS *carryin, KMR_KVS *kvo, KMR_KVS *carryout, kmr_redfn_t r)
Scans every key-value with a reduce-function locally (independently on each rank).
Definition: kmrbase.c:2962
int kmr_save_kvs(KMR_KVS *kvi, void **dataq, size_t *szq, struct kmr_option opt)
Packs locally the contents of a key-value stream to a byte array.
Definition: kmrbase.c:1026
Key-Value Stream.
Definition: kmr.h:503
MPI_Comm * kmr_get_spawner_communicator(KMR *mr, long index)
Obtains (a reference to) a parent inter-communicator of a spawned process.
Definition: kmrmapms.c:1916
struct kmr_ntuple_entry kmr_nth_ntuple(struct kmr_ntuple *u, int nth)
Returns an NTH entry of an n-tuple.
Definition: kmrmoreops.c:1197
int kmr_shuffle(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Shuffles key-value pairs to the appropriate destination ranks.
Definition: kmrbase.c:2094
static const size_t kmr_kvs_entry_header
Size of an Entry Header.
Definition: kmr.h:425
int kmr_send_kvs_to_spawner(KMR *mr, KMR_KVS *kvs)
Sends the KVS from a spawned process to the map-function of the spawner.
Definition: kmrmapms.c:2127
int kmr_copy_to_array_fn(const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long i)
Copies the entry in the array.
Definition: kmrutil.c:949
int kmr_retrieve_kvs_entries(KMR_KVS *kvs, struct kmr_kvs_entry **ev, long n)
Fills local key-value entries in an array for inspection.
Definition: kmrbase.c:2860
int kmr_reduce9(_Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r, const char *, const int, const char *)
Reduces key-value pairs.
Definition: kmrbase.c:2607
Keyed-Record for Sorting.
Definition: kmr.h:415
void kmr_set_swf_verbosity(KMR *mr, int level)
Sets the verbosity of the spawn-library.
Definition: kmrwfmap.c:505
int kmr_distribute(KMR_KVS *kvi, KMR_KVS *kvo, _Bool cyclic, struct kmr_option opt)
Distributes key-values so that each rank has approximately the same number of pairs.
Definition: kmrmoreops.c:835
int kmr_add_kv_done(KMR_KVS *kvs)
Marks finished adding key-value pairs.
Definition: kmrbase.c:939
Definition: kmr.h:391
int kmr_reply_to_spawner(KMR *mr)
Sends a reply message in the spawned process, which tells it is ready to finish and may have some dat...
Definition: kmrmapms.c:1893
int kmr_add_ntuple(KMR_KVS *kvo, void *k, int klen, struct kmr_ntuple *u)
Adds an n-tuple U with a given key K and KLEN in a key-value stream KVO.
Definition: kmrmoreops.c:1295
int kmr_sort_by_one(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Sort by rank0, a degenerated case for small number of keys.
Definition: kmrmoreops.c:544
KMR Context.
Definition: kmr.h:247
int kmr_add_kv1(KMR_KVS *kvs, void *k, int klen, void *v, int vlen)
Adds a key-value pair as given directly by a pointer.
Definition: kmrbase.c:837
int kmr_concatenate_kvs(KMR_KVS *kvs[], int nkvs, KMR_KVS *kvo, struct kmr_option opt)
Concatenates a number of KVSes to one.
Definition: kmrbase.c:2754
int kmr_add_kv_space(KMR_KVS *kvs, const struct kmr_kv_box kv, void **keyp, void **valuep)
Adds a key-value pair, but only allocates a space and returns the pointers to the key and the value p...
Definition: kmrbase.c:901
int kmr_free_kvs(KMR_KVS *kvs)
Releases a key-value stream (type KMR_KVS).
Definition: kmrbase.c:679
int kmr_scan_on_values(KMR_KVS *kvi, KMR_KVS *kvo, KMR_KVS *total, kmr_redfn_t r)
Prefix-scans every key-value with a reduce-function (non-self-inclusively) and generates the final va...
Definition: kmrmoreops.c:943
int kmr_map_parallel_processes(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
Maps on processes started by MPI_Comm_spawn() to run independent MPI processes, which will not commun...
Definition: kmrmapms.c:2037
int kmr_find_string(KMR_KVS *kvi, const char *k, const char **vq)
Finds the key K in the key-value stream KVS.
Definition: kmrmoreops.c:73
void kmr_check_pushoff_fast_notice_(KMR *mr)
Check if fast-notice works.
Definition: kmraltkvs.c:808
int kmr_assign_file(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Assigns files to ranks based on data locality.
Definition: kmriolb.c:257
int kmr_split_swf_lanes_a(KMR *mr, MPI_Comm splitcomms[KMR_LANE_LEVELS], int root, int *description[], _Bool dump)
Splits a communicator in a KMR context to ones to be used for kmr_init_swf().
Definition: kmrwfmap.c:930
int kmr_copy_kvs_to_info(KMR_KVS *kvi, MPI_Info dst)
Copies kvs entires into mpi-info.
Definition: kmrutil.c:1049
int kmr_map_getline(KMR *mr, FILE *f, long limit, _Bool largebuffering, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
Calls a map-function M for each line by getline() on an input F.
Definition: kmrfiles.c:1561
int kmr_read_file_by_segments(KMR *mr, char *file, int color, void **buffer, off_t *readsize)
Reads one file by segments and reassembles by all-gather.
Definition: kmrfiles.c:1021
kmr_kv_field
Datatypes of Keys or Values.
Definition: kmr.h:368
int kmr_reduce_for_some(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r)
Reduces until some key-value are added.
Definition: kmrmoreops.c:1183
int kmr_take_one(KMR_KVS *kvi, struct kmr_kv_box *kv)
Extracts a single key-value pair locally in the key-value stream KVI.
Definition: kmrbase.c:1427
int kmr_pairing(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Replaces a value part with a key-value pairing.
Definition: kmrmoreops.c:212
Handy Copy of a Key-Value Field.
Definition: kmr.h:401
int kmr_sort_large(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Sorts a key-value stream by the regular or the random sampling-sort.
Definition: kmrmoreops.c:469
void kmr_init_pushoff_fast_notice_(MPI_Comm, _Bool verbose)
Initializes RDMA for fast-notice.
Definition: kmraltkvs.c:726
int kmr_dump_kv(struct kmr_kv_box kv, const KMR_KVS *kvs, char *buf, int buflen)
Dumps contents of a key-value.
Definition: kmrutil.c:1684
int kmr_dump_kvs(KMR_KVS *kvs, int flag)
Dumps contents of a KVS to stdout.
Definition: kmrutil.c:1711
Options to Mapping by Spawns.
Definition: kmr.h:708
int kmr_stop_swf_workers(KMR *mr)
Finishes the workers of workflow.
Definition: kmrwfmap.c:731
int kmr_fin(void)
Clears the environment.
Definition: kmrbase.c:124
Key-Value Stream with Shuffling at Addition of Key-Values.
Definition: kmr.h:587
int kmr_get_element_count(KMR_KVS *kvs, long *v)
Gets the total number of key-value pairs.
Definition: kmrmoreops.c:114
int kmr_move_kvs(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Moves the contents of the input KVI to the output KVO.
Definition: kmrbase.c:592
void kmr_dump_swf_history(KMR *mr)
Prints the history of kmr_map_swf(), which is the start ordering the work-items.
Definition: kmrwfmap.c:2874
int kmr_detach_swf_workers(KMR *mr)
Disengages the workers from main processing and puts them in the service loop for spawning...
Definition: kmrwfmap.c:659
int kmr_sort(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Sorts a key-value stream globally.
Definition: kmrmoreops.c:575
int kmr_dump_kvs_stats(KMR_KVS *, int level)
Dumps contents of a key-value stream, with values are pairs.
Definition: kmrutil.c:1761
int kmr_size_ntuple(struct kmr_ntuple *u)
Returns the storage size of an n-tuple.
Definition: kmrmoreops.c:1211
int kmr_separate_ntuples(KMR *mr, const struct kmr_kv_box kv[], const long n, struct kmr_ntuple **vv[2], long cnt[2], int markers[2], _Bool disallow_other_entries)
Separates the n-tuples stored in the value part of KV into the two sets by their marker values...
Definition: kmrmoreops.c:1318
State during kmr_map_ms().
Definition: kmr.h:453
int kmr_split_swf_lanes(KMR *mr, MPI_Comm splitcomms[KMR_LANE_LEVELS], int root, char *description[], _Bool dump)
Splits a communicator in a KMR context to ones to be used for kmr_init_swf().
Definition: kmrwfmap.c:1067
int kmr_put_ntuple_long(KMR *mr, struct kmr_ntuple *u, const int sz, long v)
Adds an integer value in an n-tuple U whose size is limited to SIZE.
Definition: kmrmoreops.c:1274
int kmr_match(KMR_KVS *kvi0, KMR_KVS *kvi1, KMR_KVS *kvo, struct kmr_option opt)
Makes key-value pairs as products of the two values in two key-value stream.
Definition: kmrmoreops.c:696
int kmr_map_skipping(long from, long stride, long limit, _Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
Maps by skipping the number of entries.
Definition: kmrbase.c:1192
int kmr_product_ntuples(KMR_KVS *kvo, struct kmr_ntuple **vv[2], long cnt[2], int newmarker, int slots[][2], int nslots, int keys[][2], int nkeys)
Makes a direct product of the two sets of n-tuples VV[0] and VV[1] with their counts in CNT[0] and CN...
Definition: kmrmoreops.c:1528
void kmr_dump_opaque(const char *p, int siz, char *buf, int buflen)
Puts the string of the key or value field into a buffer BUF as printable string.
Definition: kmrutil.c:1596
int kmr_map_ms_commands(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, struct kmr_spawn_option sopt, kmr_mapfn_t m)
Maps in the master-worker mode, specialized to run serial commands.
Definition: kmrmapms.c:2432
void kmr_dump_swf_lanes(KMR *mr)
Dumps lanes created by kmr_init_swf().
Definition: kmrwfmap.c:2054
int kmr_shuffle_leveling_pair_count(KMR_KVS *kvi, KMR_KVS *kvo)
Shuffles key-values so that each rank has approximately the same number of pairs. ...
Definition: kmrmoreops.c:1074
int kmr_read_files_reassemble(KMR *mr, char *file, int color, off_t offset, off_t bytes, void **buffer, off_t *readsize)
Reassembles files reading by ranks.
Definition: kmrfiles.c:653
int kmr_map_via_spawn(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
Maps on processes started by MPI_Comm_spawn().
Definition: kmrmapms.c:1992
int kmr_histogram_count_by_ranks(KMR_KVS *kvs, long *frq, double *var, _Bool rankzeroonly)
Fills an integer array FRQ[i] with the count of the elements of each rank.
Definition: kmrmoreops.c:1569
int kmr_ranking(KMR_KVS *kvi, KMR_KVS *kvo, long *count, struct kmr_option opt)
Assigns a ranking to key-value pairs, and returns the number of the total elements in COUNT...
Definition: kmrmoreops.c:764
int kmr_restore_kvs(KMR_KVS *kvo, void *data, size_t sz, struct kmr_option opt)
Unpacks locally the contents of a key-value stream from a byte array.
Definition: kmrbase.c:1092
int kmr_free_context(KMR *mr)
Releases a context created with kmr_create_context().
Definition: kmrbase.c:367
void kmr_free_swf_history(KMR *mr)
Clears the history recorded in kmr_map_swf().
Definition: kmrwfmap.c:2933
int kmr_add_identity_fn(const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long i)
Adds a given key-value pair unmodified.
Definition: kmrbase.c:995
int kmr_map_ms(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
Maps in master-worker mode.
Definition: kmrmapms.c:344
int kmr_map_rank_by_rank(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
Maps sequentially with rank by rank for debugging.
Definition: kmrbase.c:1397
int kmr_legal_minimum_field_size(KMR *mr, enum kmr_kv_field f)
Returns a minimum byte size of the field: 8 for INTEGER and FLOAT8, 0 for others. ...
Definition: kmrbase.c:2929
Options to Mapping on Files.
Definition: kmr.h:683
Unit-Sized Storage.
Definition: kmr.h:383
int kmr_replicate(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Replicates key-value pairs to be visible on all ranks, that is, it has the effect of bcast or all-gat...
Definition: kmrbase.c:2240
void kmr_dump_swf_order_history(KMR *mr, int *history, size_t length)
Returns a list of start ordering of the work-items.
Definition: kmrwfmap.c:2904
int kmr_sort_small(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Sorts a key-value stream, by partitioning to equal ranges.
Definition: kmrmoreops.c:388
int kmr_initialize_mpi(int *refargc, char ***refargv)
Checks the initialization state of MPI, and initializes MPI when not.
Definition: kmrbase.c:134
int kmr_add_string(KMR_KVS *kvs, const char *k, const char *v)
Adds a key-value pair of strings.
Definition: kmrbase.c:971
int kmr_put_ntuple_entry(KMR *mr, struct kmr_ntuple *u, const int sz, struct kmr_ntuple_entry e)
Adds an n-tuple entry E in an n-tuple U whose size is limited to SIZE.
Definition: kmrmoreops.c:1284
Key-Value Stream (DUMMY); Mandatory Entries.
Definition: kmr.h:613
int kmr_receive_kvs_from_spawned_fn(const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long index)
Collects key-value pairs generated by spawned processes.
Definition: kmrmapms.c:2161
int kmr_sort_locally(KMR_KVS *kvi, KMR_KVS *kvo, _Bool shuffling, struct kmr_option opt)
Reorders key-value pairs in a single rank.
Definition: kmrbase.c:2051
N-Tuple.
Definition: kmr.h:778
int kmr_reduce_as_one(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r)
Calls a reduce-function once as if all key-value pairs had the same key.
Definition: kmrbase.c:2683
int(* kmr_redfn_t)(const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg)
Reduce-function Type.
Definition: kmr.h:747
int kmr_local_element_count(KMR_KVS *kvs, long *v)
Gets the number of key-value pairs locally on each rank.
Definition: kmrutil.c:349
int kmr_retrieve_kv_box_entries(KMR_KVS *kvs, struct kmr_kv_box *ev, long n)
Fills local key-value entries in an array of kmr_kv_box for inspection.
Definition: kmrbase.c:2883
KMR_KVS * kmr_create_kvs7(KMR *mr, enum kmr_kv_field k, enum kmr_kv_field v, struct kmr_option opt, const char *, const int, const char *)
Makes a new key-value stream with the specified field data-types.
Definition: kmrbase.c:568
int kmr_reverse(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Makes a new pair by swapping the key and the value in each pair.
Definition: kmrmoreops.c:159
N-Tuple Argument.
Definition: kmr.h:787
Information of Source Code Line.
Definition: kmr.h:107
int(* kmr_mapfn_t)(const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long index)
Map-function Type.
Definition: kmr.h:736
int kmr_map_on_rank_zero(KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
Maps on rank0 only.
Definition: kmrbase.c:1514
Record of Push-Off Key-Value Stream for a Rank.
Definition: kmr.h:570
int kmr_copy_info_to_kvs(MPI_Info src, KMR_KVS *kvo)
Copies mpi-info entires into kvs.
Definition: kmrutil.c:997
int kmr_choose_first_part(KMR_KVS *kvi, KMR_KVS *kvo, long n, struct kmr_option opt)
Chooses the first N entries from a key-value stream KVI.
Definition: kmrmoreops.c:1145
int kmr_map_processes(_Bool nonmpi, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
Maps on processes started by MPI_Comm_spawn() to run independent processes.
Definition: kmrmapms.c:2087
KMR * kmr_create_context(const MPI_Comm comm, const MPI_Info conf, const char *name)
Makes a new KMR context (a context has type KMR).
Definition: kmrbase.c:168