KMR
|
Simple Workflow by Static-Spawning. More...
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <unistd.h>
#include <limits.h>
#include <string.h>
#include <dlfcn.h>
#include <errno.h>
#include <assert.h>
#include "kmr.h"
#include "kmrimpl.h"
#include "kmrspawn.h"
Go to the source code of this file.
Classes | |
struct | kmr_lane_no |
Lane Number (at-all-ranks). More... | |
struct | kmr_lane_state |
Work-Item Queue of a Lane (at-the-master). More... | |
struct | kmr_lane_vector |
Vector of Lanes (at-the-master). More... | |
struct | kmr_pair |
struct | kmr_rank_vector |
Vector of Ranks (at-the-master). More... | |
struct | kmr_swf |
Workflow State (at-all-ranks). More... | |
struct | kmr_work_item |
Work Description (at-the-master). More... | |
struct | kmr_work_list |
Work-Item Queue Entry (at-the-master). More... | |
Macros | |
#define | KMR_LANE_LEVELS (4) |
Maximum Levels of Lanes. More... | |
#define | MAX(a, b) (((a)>(b))?(a):(b)) |
Functions | |
static int | kmr_activate_workers (struct kmr_swf *wf, _Bool shutdown) |
static struct kmr_lane_state * | kmr_allocate_lane (int level, struct kmr_lane_no id, int nprocs) |
Makes a lane structure (at-the-master). More... | |
static void | kmr_bond_all_lanes (struct kmr_swf *wf, struct kmr_lane_vector *v, struct kmr_pair laneranks[][KMR_LANE_LEVELS]) |
Collects lanes to make a superlane, which build up to a single top-lane (at-the-master). More... | |
static void | kmr_bond_sublanes (struct kmr_swf *wf, struct kmr_lane_state *sup, struct kmr_lane_state *lanes[], int nlanes) |
Builds a vector of sublanes for the superlane SUP (at-the-master). More... | |
static int | kmr_check_lane_id (struct kmr_swf *wf, struct kmr_lane_no id, _Bool admit_any) |
Checks well-formedness of a lane-number. More... | |
static int | kmr_check_partitioning (struct kmr_swf *wf, int supercolor, MPI_Comm subcomm) |
Checks if a sub-communicator is a partitioning of a super-communicator (at-all-ranks). More... | |
static void | kmr_check_work_queues_empty (struct kmr_swf *wf) |
static int | kmr_ckeck_sublanes_empty (struct kmr_swf *wf, struct kmr_lane_state *lane) |
static void | kmr_clear_lane_id (struct kmr_lane_no *id) |
Clears the lane-number to a null-lane. More... | |
static unsigned long | kmr_color_of_lane (struct kmr_lane_no id) |
Returns a lane-number as a single integer color. More... | |
static int | kmr_color_subcommunicator (struct kmr_swf *wf, MPI_Comm subcomm, MPI_Comm supercomm) |
Colors sub-communicators distinctly in a super-communicator, and returns the color which names names a lane (at-all-ranks). More... | |
static int | kmr_count_bottom_level_lanes (struct kmr_lane_state *lane) |
Counts the number of the bottom level lanes (at-the-master). More... | |
static int | kmr_dequeue_scattered_work (struct kmr_swf *wf, struct kmr_lane_state *lane, struct kmr_work_item *x) |
Removes all occurrences of a work-item (which may be scattered for an any-lane) from the all queues. More... | |
static struct kmr_work_item * | kmr_dequeue_work (struct kmr_lane_state *lane) |
int | kmr_detach_swf_workers (KMR *mr) |
Disengages the workers from main processing and puts them in the service loop for spawning. More... | |
static void | kmr_dump_split_lanes (KMR *mr, struct kmr_lane_no id) |
static void | kmr_dump_sublanes (struct kmr_swf *wf, struct kmr_lane_state *lane) |
void | kmr_dump_swf_history (KMR *mr) |
Prints the history of kmr_map_swf(), which is the start ordering the work-items. More... | |
void | kmr_dump_swf_lanes (KMR *mr) |
Dumps lanes created by kmr_init_swf(). More... | |
void | kmr_dump_swf_order_history (KMR *mr, int *history, size_t count) |
Returns a list of start ordering of the work-items. More... | |
static int | kmr_enqueue_work (struct kmr_swf *wf, struct kmr_lane_state *lane, struct kmr_work_item *x, _Bool multipleany) |
Enqueues a work-item in some sublane of a LANE (at-the-master). More... | |
static void | kmr_err_when_swf_is_not_initialized (KMR *mr) |
static int | kmr_find_leader (struct kmr_swf *wf, struct kmr_lane_state *lane, int level, struct kmr_pair laneranks[][KMR_LANE_LEVELS]) |
Searches a leader (rank=0) in a LANE at a LEVEL (at-the-master). More... | |
static int | kmr_find_sublane_index (struct kmr_swf *wf, struct kmr_lane_state *lane) |
Finds a sublane index of a superlane. More... | |
static int | kmr_find_worker_index (struct kmr_swf *wf, struct kmr_lane_state *lane, int rank) |
Finds a worker index of a lane for a rank. More... | |
static int | kmr_finish_current_work (struct kmr_swf *wf, struct kmr_lane_state *lane) |
int | kmr_finish_swf (KMR *mr) |
Clears the lanes of simple workflow. More... | |
static void | kmr_free_lanes (struct kmr_swf *wf, struct kmr_lane_state *lane) |
Frees a lane and its sublanes, recursively (at-the-master). More... | |
void | kmr_free_swf_history (KMR *mr) |
Clears the history recorded in kmr_map_swf(). More... | |
static void | kmr_free_work_list (struct kmr_swf *wf, struct kmr_work_list *h, struct kmr_lane_state *lane, _Bool warn) |
static int | kmr_handle_worker_request (struct kmr_swf *wf, _Bool joining) |
(spawn-library-protocol) Handles requests from workers. More... | |
int | kmr_init_swf (KMR *mr, MPI_Comm lanecomms[KMR_LANE_LEVELS], int master) |
Initializes the lanes of simple workflow. More... | |
static int | kmr_join_to_workers (struct kmr_swf *wf, struct kmr_lane_state *lane) |
static _Bool | kmr_lane_eq (struct kmr_lane_no n0, struct kmr_lane_no n1, int level) |
Compares lane-numbers up to the LEVEL. More... | |
static char * | kmr_lane_string (struct kmr_lane_no n, _Bool print_all_levels) |
(NO-THREAD-SAFE) Returns a string representation of a lane-number. More... | |
static int | kmr_level_of_lane (struct kmr_lane_no n, _Bool admit_any) |
Returns the maximum level of a given lane-number (zero to KMR_LANE_LEVELS-1), or returns -1 for a null-lane. More... | |
static int | kmr_link_work (struct kmr_swf *wf, struct kmr_lane_state *lane, struct kmr_work_item *x) |
static int | kmr_load_spawn_library (struct kmr_swf *wf, _Bool test_with_fake_spawn) |
Loads the spawn-library "libkmrspawn.so". More... | |
static struct kmr_lane_vector * | kmr_make_bottom_lanes (struct kmr_swf *wf, struct kmr_lane_no *laneids, struct kmr_pair laneranks[][KMR_LANE_LEVELS]) |
Makes lanes at the bottom levels (at-the-master). More... | |
static struct kmr_lane_vector * | kmr_make_lane_vector (int n, struct kmr_lane_state *lanes[]) |
Packs lanes in a vector. More... | |
static void | kmr_make_lanes (struct kmr_swf *wf) |
Initializes thelanes at the master rank (at-all-ranks). More... | |
static struct kmr_rank_vector * | kmr_make_rank_vector (int n) |
Allocates a rank vector, filling all entries with -1. More... | |
static struct kmr_work_item * | kmr_make_work_item (struct kmr_swf *wf, struct kmr_lane_no id, const char *args, size_t argssize, int seq, _Bool separatorspace) |
int | kmr_map_swf (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_spawn_option opt, kmr_mapfn_t mapfn) |
Maps with a simple workflow. More... | |
static struct kmr_lane_no | kmr_name_lane (KMR *mr, const char *s) |
Parses a string as a lane-number. More... | |
static void | kmr_preset_lane_state (struct kmr_swf *wf, _Bool queuing) |
static void | kmr_record_in_history (struct kmr_swf *wf, struct kmr_work_item *x) |
static int | kmr_remove_work (struct kmr_swf *wf, struct kmr_lane_state *lane, struct kmr_work_item *x) |
static void | kmr_resolve_lanes (struct kmr_swf *wf) |
Assigns a lane-number to a rank (wf->lane_id_on_proc) (at-all-ranks). More... | |
static int | kmr_schedule_lanes (struct kmr_swf *wf, struct kmr_lane_state *lane) |
Scehdules a lane for a next work-item (at-the-master). More... | |
void | kmr_set_swf_verbosity (KMR *mr, int level) |
Sets the verbosity of the spawn-library. More... | |
int | kmr_split_swf_lanes (KMR *mr, MPI_Comm splitcomms[KMR_LANE_LEVELS], int root, char *description[], _Bool dump) |
Splits a communicator in a KMR context to ones to be used for kmr_init_swf(). More... | |
int | kmr_split_swf_lanes_a (KMR *mr, MPI_Comm splitcomms[KMR_LANE_LEVELS], int root, int *description[], _Bool dump) |
Splits a communicator in a KMR context to ones to be used for kmr_init_swf(). More... | |
static int | kmr_start_lanes (struct kmr_swf *wf, struct kmr_lane_state *lane, struct kmr_work_item *x) |
Requests workers to start a work for a lane and its sublanes, and then connects to workers (at-the-master). More... | |
static int | kmr_start_worker (struct kmr_spawn_work *w, size_t msglen, int rank, MPI_Comm basecomm) |
int | kmr_stop_swf_workers (KMR *mr) |
Finishes the workers of workflow. More... | |
static int | kmr_yield_for_lane (struct kmr_swf *wf, struct kmr_lane_state *lane, int sublaneindex) |
Schedules a next work-item when a worker or a sublane finishes (at-the-master). More... | |
Variables | |
static short const | KMR_ANY_LANE = -2 |
static short const | KMR_NO_LANE = -1 |
Simple Workflow by Static-Spawning.
It needs a library for static-spawning "libkmrspawn.so" (it is only available on K). It runs MPI executables under the control of a simple master/worker scheduler. It groups ranks as lanes, where the lanes are hierarchically split into maximum four levels of sublanes. Each lane is associated with a subworld communicator. In the following diagram, lane (0) is split into two lanes (0.0) and (0.1), and they are split further. A work-item (or job/task) is enqueued in a lane specified by a list of lane-numbers like (0.1) or (0.1.1). A work-item entered in a lane runs using all sublanes below it. Work-items in each lane are scheduled in the FIFO order. The single and dedicated master rank keeps track of running/idling lanes.
The design of this workflow specifies explicitly the scheduling, and thus the data-flow (dependency) is implicit. The scheduler at each rank is almost stateless, where the state is stored in the spawning library. The include file "kmrspawn.h" is an interface to the spawning library. IMPLEMENTATION NOTE: The file "kmrspawn.c" is copied from the spawning library to use it as a dummy worker. IMPLEMENTATION NOTE: Creation of inter-communicators for subworlds is serialized, and a use of a single tag is sufficient.
Definition in file kmrwfmap.c.
#define KMR_LANE_LEVELS (4) |
Maximum Levels of Lanes.
Definition at line 62 of file kmrwfmap.c.
|
static |
Compares lane-numbers up to the LEVEL.
Note that comparison includes the LEVEL. It returns true when (LEVEL=-1).
Definition at line 220 of file kmrwfmap.c.
|
static |
Clears the lane-number to a null-lane.
Definition at line 234 of file kmrwfmap.c.
|
static |
Parses a string as a lane-number.
It signals an error for improper format strings, but, it does not check the range of each index. Examples are: "", "3", "3.3.1", "*", "3.3.*", "3.*.*" ("" is parsed but unusable). Illegal examples are: ".", "3.", "3.*.3".
Definition at line 248 of file kmrwfmap.c.
|
static |
(NO-THREAD-SAFE) Returns a string representation of a lane-number.
It returns "-" for a null-lane.
Definition at line 344 of file kmrwfmap.c.
|
static |
Returns the maximum level of a given lane-number (zero to KMR_LANE_LEVELS-1), or returns -1 for a null-lane.
Definition at line 383 of file kmrwfmap.c.
|
static |
Returns a lane-number as a single integer color.
A color is used to check the identity of a lane in assertions. A color is used in place of a lane in the spawn-library, because it does not know the lanes.
Definition at line 406 of file kmrwfmap.c.
|
static |
Finds a worker index of a lane for a rank.
Definition at line 415 of file kmrwfmap.c.
|
static |
Finds a sublane index of a superlane.
Definition at line 435 of file kmrwfmap.c.
|
static |
Packs lanes in a vector.
It returns an unfilled vector when a null LANES is passed.
Definition at line 457 of file kmrwfmap.c.
|
static |
Allocates a rank vector, filling all entries with -1.
Definition at line 476 of file kmrwfmap.c.
void kmr_set_swf_verbosity | ( | KMR * | mr, |
int | level | ||
) |
Sets the verbosity of the spawn-library.
LEVEL is 1 to 3, where 3 is the most verbose. It should be called after kmr_init_swf() and before detaching by kmr_detach_swf_workers() to affect worker ranks.
Definition at line 505 of file kmrwfmap.c.
|
static |
Loads the spawn-library "libkmrspawn.so".
It implements static-spawning, which is only available on K, FX10, and FX100.
Definition at line 827 of file kmrwfmap.c.
|
static |
Assigns a lane-number to a rank (wf->lane_id_on_proc) (at-all-ranks).
It calculates a lane-number from the set of split communicators. It assumes the master-rank (the last rank) is excluded from the lanes.
Definition at line 1368 of file kmrwfmap.c.
|
static |
Initializes thelanes at the master rank (at-all-ranks).
It collects lane-numbers of all ranks and makes the lane structures from the bottom and upwards.
Definition at line 1607 of file kmrwfmap.c.
|
static |
Frees a lane and its sublanes, recursively (at-the-master).
Definition at line 2006 of file kmrwfmap.c.
int kmr_init_swf | ( | KMR * | mr, |
MPI_Comm | lanecomms[KMR_LANE_LEVELS], | ||
int | master | ||
) |
Initializes the lanes of simple workflow.
Lanes of workflow are created corresponding to communicators in LANECOMMS[], one lane for each communicator. Work-items in a lane at a level are executed in the communicator LANECOMMS[level]. MASTER specifies the master rank, which should not be included in any communicator, because the master needs to be distinct from the workers.
Definition at line 528 of file kmrwfmap.c.
|
static |
(spawn-library-protocol) Handles requests from workers.
It returns MPI_ERR_PENDING when some workers not finish, or MPI_SUCCESS. It blocks in receiving a new request.
Definition at line 2953 of file kmrwfmap.c.
int kmr_detach_swf_workers | ( | KMR * | mr | ) |
Disengages the workers from main processing and puts them in the service loop for spawning.
Only the master rank returns from this call and continues processing, but the worker ranks never return as if they call exit(). It replaces the communicator in the KMR context with a self-communicator after saving the old communicator for workflow. Replacing the communicator makes the context independent from the other ranks and safe to free it. It finalizes the context of workers.
Definition at line 659 of file kmrwfmap.c.
int kmr_stop_swf_workers | ( | KMR * | mr | ) |
Finishes the workers of workflow.
It stops the service loop of the workers and lets them exit. It should be called (immediately) before MPI_Finalize() at the master rank.
Definition at line 731 of file kmrwfmap.c.
int kmr_finish_swf | ( | KMR * | mr | ) |
Clears the lanes of simple workflow.
Definition at line 750 of file kmrwfmap.c.
int kmr_split_swf_lanes_a | ( | KMR * | mr, |
MPI_Comm | splitcomms[KMR_LANE_LEVELS], | ||
int | root, | ||
int * | description[], | ||
_Bool | dump | ||
) |
Splits a communicator in a KMR context to ones to be used for kmr_init_swf().
This is a utility. It is restricted to make lanes having the same depth of levels. DESCRIPTION is an array of a list of positive integers terminated by zeros in the form D[0]={L0,L1,...,Lk-1,0}, D[1]={M0,M1,...,Mj-1,0}, D[2]={N0,N1,...,Ni-1,0}. Here, it is the depth=3 case. N0 to Ni-1 specifies how to split ranks (the bottom level) to i groups. They must satisfy (N0+...+Ni-1)<=(nprocs-1). It needs at least one spare rank for the master. M0 to Mj-1 specifies the 2nd bottom level, and it must satisfy (M0+...+Mj-1)=i. L0 to Lk-1 specifies the top level, and it must satisfy (L0+...+Lk-1)=j. DESCRIPTION and NLAYERS need to be valid only on rank0 (they are broadcasted inside this routine). The ranks not a member of a lane have a null communicator at each level.
Definition at line 930 of file kmrwfmap.c.
int kmr_split_swf_lanes | ( | KMR * | mr, |
MPI_Comm | splitcomms[KMR_LANE_LEVELS], | ||
int | root, | ||
char * | description[], | ||
_Bool | dump | ||
) |
Splits a communicator in a KMR context to ones to be used for kmr_init_swf().
This is a utility. DESCRIPTION is a vector of strings terminated by a null-string. A line consists of a lane-number, a separator colon, and a number of ranks. Thus, each line looks like "3.3.3:4". It does not accept any whitespaces. Note that the descriptions are to distinguish lanes, and the lane-numbers can change, because they are once translated to communicators.
Definition at line 1067 of file kmrwfmap.c.
|
static |
Checks well-formedness of a lane-number.
Slots after an any-lane must be an any-lane or a no-lane. Slots after a no-lane must be a no-lane.
Definition at line 1454 of file kmrwfmap.c.
|
static |
Colors sub-communicators distinctly in a super-communicator, and returns the color which names names a lane (at-all-ranks).
It enumerates the rank0 processes of the sub-communicators. It returns -1 for the ranks with a null SUBCOMM.
Definition at line 1519 of file kmrwfmap.c.
|
static |
Checks if a sub-communicator is a partitioning of a super-communicator (at-all-ranks).
A SUPERCOLOR gives a distinct color to each super-communicator, which should be identical in the sub-communicator.
Definition at line 1413 of file kmrwfmap.c.
|
static |
Makes lanes at the bottom levels (at-the-master).
The depths may be not eqaul. The created bottom lanes are bonded to a superlane in kmr_bond_all_lanes().
Definition at line 1749 of file kmrwfmap.c.
|
static |
Collects lanes to make a superlane, which build up to a single top-lane (at-the-master).
It destructively modifies the passed vector V. (SLOW).
Definition at line 1830 of file kmrwfmap.c.
|
static |
Builds a vector of sublanes for the superlane SUP (at-the-master).
It destructively clears LANES[i] to null, when it is merged to a superlane.
Definition at line 1884 of file kmrwfmap.c.
|
static |
Searches a leader (rank=0) in a LANE at a LEVEL (at-the-master).
It returns a rank in the base communicator or -1 if not found. It returns -1 for the top lane, because the top lane is never used for work. (SLOW).
Definition at line 1944 of file kmrwfmap.c.
|
static |
Counts the number of the bottom level lanes (at-the-master).
Definition at line 1985 of file kmrwfmap.c.
|
static |
Makes a lane structure (at-the-master).
It is a bottom lane if NPROCS is non-zero, or a superlane if NPROCS is zero. The lane-number argument is ignored when creating a top-lane (level=-1).
Definition at line 1703 of file kmrwfmap.c.
|
static |
Removes all occurrences of a work-item (which may be scattered for an any-lane) from the all queues.
Definition at line 2680 of file kmrwfmap.c.
void kmr_dump_swf_lanes | ( | KMR * | mr | ) |
Dumps lanes created by kmr_init_swf().
Definition at line 2054 of file kmrwfmap.c.
|
static |
Enqueues a work-item in some sublane of a LANE (at-the-master).
Note it also puts the work-item in the sublanes below, which will block the lanes for yielding them for a superlane.
Definition at line 2352 of file kmrwfmap.c.
int kmr_map_swf | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_spawn_option | opt, | ||
kmr_mapfn_t | mapfn | ||
) |
Maps with a simple workflow.
The ranks are configured as lanes, which should be initialized by kmr_init_swf() in advance. The key part specifies the lane like "3.3.3", and the value part specifies the command-line arguments. The work-items in a lane run in the FIFO order. The lane specification can be an any-lane using a wildcard like "3.3.*". The higher level lane blocks the sublanes, thus, for example, an entry with the lane "3.3" blocks the following entries with the lanes "3.3.*".
Definition at line 2112 of file kmrwfmap.c.
|
static |
Scehdules a lane for a next work-item (at-the-master).
There is no work-items currently running in the lane and its sublanes. It is called by kmr_yield_for_lane().
Definition at line 2557 of file kmrwfmap.c.
|
static |
Requests workers to start a work for a lane and its sublanes, and then connects to workers (at-the-master).
Definition at line 2720 of file kmrwfmap.c.
|
static |
Schedules a next work-item when a worker or a sublane finishes (at-the-master).
It returns MPI_SUCCESS when the all of its workers and sublanes finish, or MPI_ERR_PENDING when some work-items are running. Scheduling of the lanes works in a bottom-up mannar, and is implemented by two functions kmr_yield_for_lane() and kmr_schedule_lanes(). kmr_yield_for_lane() propagates the state upwards, and kmr_schedule_lanes() starts a work-item on the lane. kmr_yield_for_lane() is called when one of the workers or the sublanes finishes. kmr_schedule_lanes() is called when the lane is free (no running workers nor running sublanes) to start the lane. A call to kmr_yield_for_lane() first enters in a bottom-level lane and ascends its superlanes upwards. Note that the current work-item can be nothing.
Definition at line 2517 of file kmrwfmap.c.
void kmr_dump_swf_history | ( | KMR * | mr | ) |
Prints the history of kmr_map_swf(), which is the start ordering the work-items.
The work-items are given sequence numbers from zero in the order in the KVS.
Definition at line 2874 of file kmrwfmap.c.
void kmr_dump_swf_order_history | ( | KMR * | mr, |
int * | history, | ||
size_t | count | ||
) |
Returns a list of start ordering of the work-items.
The work-items are given sequence numbers from zero in the order in the KVS, and the HISTORY vector is filled by them in the order of the starts of the work-items. The COUNT specifies the allocated length of the history vector.
Definition at line 2904 of file kmrwfmap.c.
void kmr_free_swf_history | ( | KMR * | mr | ) |
Clears the history recorded in kmr_map_swf().
The history is also automatically cleared when a next call to kmr_map_swf().
Definition at line 2933 of file kmrwfmap.c.