28 #define KMR_NO_STATIC_SPAWNING 30 #ifdef KMR_NO_STATIC_SPAWNING 31 #define KMR_MAIN_LIBRARY (1) 33 #define KMR_MAIN_LIBRARY (0) 46 #define kmr_free(P,S) free((P)) 47 #define kmr_error(MR,S) ((*kmr_ld_err)(DIE,(S))) 48 #define kmr_malloc(S) malloc((S)) 52 #define kmr_mpi_byte MPI_BYTE 53 #define kmr_mpi_comm_null MPI_COMM_NULL 55 #define kmr_mpi_byte (hooks->h.mpi_byte) 56 #define kmr_mpi_comm_null (hooks->h.mpi_comm_null) 60 #define kmr_spawn_true_exit exit 61 #define kmr_spawn_true_mpi_finalize MPI_Finalize 62 #define kmr_spawn_mpi_comm_size MPI_Comm_size 63 #define kmr_spawn_mpi_comm_rank MPI_Comm_rank 64 #define kmr_spawn_mpi_send MPI_Send 65 #define kmr_spawn_mpi_recv MPI_Recv 66 #define kmr_spawn_mpi_get_count MPI_Get_count 67 #define kmr_spawn_mpi_intercomm_create MPI_Intercomm_create 68 #define kmr_spawn_mpi_comm_dup MPI_Comm_dup 69 #define kmr_spawn_mpi_comm_free MPI_Comm_free 70 #define kmr_spawn_mpi_comm_set_name MPI_Comm_set_name 76 #define KMR_LIBAPI(FN) FN ## _standin 78 #define KMR_LIBAPI(FN) FN 94 if (kmr_fake_spawn_hooks != 0 && kmr_fake_spawn_hooks != hooks) {
97 kmr_fake_spawn_hooks = hooks;
105 int argc,
char **argv);
117 MPI_Comm basecomm,
int masterrank,
119 int argc,
char **argv),
120 int nsubworlds, MPI_Comm subworlds[],
121 unsigned long colors[],
size_t argssize)
127 cc = kmr_spawn_mpi_comm_size(basecomm, &nprocs);
128 assert(cc == MPI_SUCCESS);
129 cc = kmr_spawn_mpi_comm_rank(basecomm, &rank);
130 assert(cc == MPI_SUCCESS);
132 if (!(0 <= masterrank && masterrank < nprocs)) {
134 snprintf(ee,
sizeof(ee),
135 (
"Bad master rank to kmr_spawn_setup() (master=%d).\n"),
137 kmr_error(hooks->s.mr, ee);
140 if (rank == masterrank) {
142 snprintf(ee,
sizeof(ee),
143 (
"Bad master rank to kmr_spawn_setup();" 144 " master in workers.\n"));
145 kmr_error(hooks->s.mr, ee);
148 if (nsubworlds > KMR_SPAWN_SUBWORLDS) {
150 snprintf(ee,
sizeof(ee),
151 (
"Too many subworlds to kmr_spawn_setup()" 152 " (n=%d, limit=%d).\n"),
153 nsubworlds, KMR_SPAWN_SUBWORLDS);
154 kmr_error(hooks->s.mr, ee);
158 hooks->s.master_rank = masterrank;
159 hooks->s.base_rank = rank;
160 hooks->s.base_comm = basecomm;
161 for (
int i = 0; i < KMR_SPAWN_SUBWORLDS; i++) {
162 if (i < nsubworlds) {
163 hooks->s.subworlds[i].comm = subworlds[i];
164 hooks->s.subworlds[i].color = colors[i];
166 hooks->s.subworlds[i].comm = kmr_mpi_comm_null;
167 hooks->s.subworlds[i].color = 0;
172 hooks->s.exec_fn = execfn;
174 hooks->s.exec_fn = kmr_spawn_exec_command;
177 if (hooks->s.rpc_buffer != 0) {
178 kmr_free(hooks->s.rpc_buffer, hooks->s.rpc_size);
179 hooks->s.rpc_buffer = 0;
183 hooks->s.rpc_size = msz;
185 hooks->s.spawn_world = kmr_mpi_comm_null;
186 hooks->s.spawn_parent = kmr_mpi_comm_null;
187 hooks->s.running_work = 0;
188 hooks->s.mpi_initialized = 0;
189 hooks->s.abort_when_mpi_abort = 0;
195 KMR_LIBAPI(kmr_spawn_set_verbosity) (
struct kmr_spawn_hooks *hooks,
int level)
197 hooks->s.print_trace = (level >= 2);
201 kmr_ld_set_error_printer(level, 0);
219 KMR_LIBAPI(kmr_spawn_service) (
struct kmr_spawn_hooks *hooks,
int status)
221 _Bool tracing5 = (hooks->s.print_trace);
222 MPI_Comm basecomm = hooks->s.base_comm;
223 const int master = hooks->s.master_rank;
227 assert(hooks->s.base_rank != hooks->s.master_rank);
228 assert(hooks->s.rpc_buffer != 0 && hooks->s.rpc_size > 0);
233 cc = kmr_spawn_clean_process(hooks);
234 assert(cc == MPI_SUCCESS);
242 fprintf(stderr,
";;KMR [%05d] Entering service loop.\n",
250 mbuf->req = KMR_SPAWN_NEXT;
251 mbuf->protocol_version = KMR_SPAWN_MAGIC;
252 mbuf->initial_message = (hooks->s.service_count == 0);
253 mbuf->status = exitstatus;
255 cc = kmr_spawn_mpi_send(mbuf, msz, kmr_mpi_byte, master,
256 KMR_SPAWN_RPC_TAG, basecomm);
257 assert(cc == MPI_SUCCESS);
261 int msz = (int)hooks->s.rpc_size;
262 mbuf->req = KMR_SPAWN_NONE;
265 cc = kmr_spawn_mpi_recv(mbuf, msz, kmr_mpi_byte, master,
266 KMR_SPAWN_RPC_TAG, basecomm, &st);
267 assert(cc == MPI_SUCCESS);
268 cc = kmr_spawn_mpi_get_count(&st, kmr_mpi_byte, &len);
269 assert(cc == MPI_SUCCESS);
270 size_t msglen = (size_t)len;
271 int rank = st.MPI_SOURCE;
273 hooks->s.service_count++;
274 assert(hooks->s.service_count != 0);
277 case KMR_SPAWN_NONE: {
280 assert(w->protocol_version == KMR_SPAWN_MAGIC);
285 case KMR_SPAWN_WORK: {
287 if (msglen != (
size_t)w0->message_size) {
288 kmr_error(hooks->s.mr,
289 "Bad RPC message size");
292 assert(w0->protocol_version == KMR_SPAWN_MAGIC);
298 ";;KMR [%05d] Receive an activate message.\n",
308 cc = kmr_spawn_clean_process(hooks);
309 assert(cc == MPI_SUCCESS);
313 memcpy(w, w0, msglen);
314 cc = kmr_spawn_join_to_master(hooks, w, msglen);
315 assert(cc == MPI_SUCCESS);
316 exitstatus = kmr_spawn_start_work(hooks, w, msglen);
322 cc = kmr_spawn_clean_process(hooks);
323 assert(cc == MPI_SUCCESS);
330 snprintf(ee,
sizeof(ee),
331 "Bad RPC message request=0x%x length=%zd from rank=%d.\n",
332 mbuf->req, msglen, rank);
333 kmr_error(hooks->s.mr, ee);
342 if (hooks->s.rpc_buffer != 0) {
343 kmr_free(hooks->s.rpc_buffer, hooks->s.rpc_size);
344 hooks->s.rpc_buffer = 0;
345 hooks->s.rpc_size = 0;
348 kmr_spawn_true_mpi_finalize();
349 kmr_spawn_true_exit(0);
353 static int kmr_spawn_make_argv_printable(
char *s,
size_t sz,
char **argv);
361 _Bool tracing5 = (hooks->s.print_trace || w->print_trace != 0);
364 if (!(0 <= w->subworld && w->subworld < KMR_SPAWN_SUBWORLDS)
365 || hooks->s.subworlds[w->subworld].comm == kmr_mpi_comm_null) {
367 snprintf(ee,
sizeof(ee),
368 (
"Bad subworld index for spawning (index=%d).\n"),
370 kmr_error(hooks->s.mr, ee);
378 char *e = &(w->args[asz]);
381 while (p[0] != 0 && p < (e - 1)) {
383 while (p[0] != 0 && p < (e - 1)) {
391 assert(p == (e - 1) || p[0] == 0);
394 char *argv[argc + 1];
399 char *e = &(w->args[asz]);
402 while (p[0] != 0 && p < (e - 1)) {
406 while (p[0] != 0 && p < (e - 1)) {
414 assert(p == (e - 1) || p[0] == 0);
419 hooks->s.running_work = w;
420 hooks->s.mpi_initialized = 0;
424 kmr_spawn_make_argv_printable(aa, 45, argv);
425 printf(
";;KMR [%05d] EXEC: %s\n",
426 hooks->s.base_rank, aa);
430 assert(hooks->s.exec_fn != 0);
431 int exitstatus = (*hooks->s.exec_fn)(hooks, argc, argv);
434 hooks->s.running_work = 0;
435 hooks->s.mpi_initialized = 0;
441 kmr_spawn_exec_command(
struct kmr_spawn_hooks *hooks,
int argc,
char **argv)
446 kmr_ld_usoexec(argv, 0, hooks->d.initial_argv, hooks->d.options_flags,
447 hooks->d.options_heap_bottom);
453 kmr_spawn_make_argv_printable(
char *s,
size_t sz,
char **argv)
460 for (
int i = 0; argv[i] != 0; i++) {
461 cc = snprintf(&s[cnt], (sz - cnt),
"%s%s",
462 (i == 0 ?
"" :
","), argv[i]);
465 snprintf(&s[sz - 4], 4,
"...");
478 assert(w->subworld < KMR_SPAWN_SUBWORLDS);
479 assert(hooks->s.subworlds[w->subworld].comm != kmr_mpi_comm_null);
480 MPI_Comm basecomm = hooks->s.base_comm;
482 MPI_Comm subworld = hooks->s.subworlds[w->subworld].comm;
483 unsigned long color = hooks->s.subworlds[w->subworld].color;
484 _Bool tracing5 = hooks->s.print_trace;
489 cc = kmr_spawn_mpi_comm_size(subworld, &nprocs);
490 assert(cc == MPI_SUCCESS);
491 if (w->nprocs > nprocs) {
493 snprintf(ee,
sizeof(ee),
494 (
"Bad spawn call; number of procs mismatch" 495 " (requested=%d size=%d)"),
497 kmr_error(hooks->s.mr, ee);
500 if (w->color != color) {
502 snprintf(ee,
sizeof(ee),
503 (
"Bad spawn call; color mismatch (%lu,%lu).\n"),
505 kmr_error(hooks->s.mr, ee);
509 assert(hooks->s.spawn_world == kmr_mpi_comm_null);
510 assert(hooks->s.spawn_parent == kmr_mpi_comm_null);
512 cc = kmr_spawn_mpi_comm_dup(subworld, &hooks->s.spawn_world);
513 assert(cc == MPI_SUCCESS);
514 cc = kmr_spawn_mpi_comm_set_name(hooks->s.spawn_world,
515 hooks->h.world_name);
516 assert(cc == MPI_SUCCESS);
519 fprintf(stderr,
";;KMR [%05d] Connect to master.\n",
524 cc = kmr_spawn_mpi_intercomm_create(hooks->s.spawn_world, 0,
526 hooks->s.master_rank,
528 &hooks->s.spawn_parent);
529 assert(cc == MPI_SUCCESS);
541 if (hooks->s.running_work != 0) {
542 kmr_free(hooks->s.running_work,
543 (
size_t)hooks->s.running_work->message_size);
544 hooks->s.running_work = 0;
547 hooks->s.mpi_initialized = 0;
549 if (hooks->s.spawn_world != kmr_mpi_comm_null) {
550 cc = kmr_spawn_mpi_comm_free(&hooks->s.spawn_world);
551 assert(cc == MPI_SUCCESS);
553 if (hooks->s.spawn_parent != kmr_mpi_comm_null) {
554 cc = kmr_spawn_mpi_comm_free(&hooks->s.spawn_parent);
555 assert(cc == MPI_SUCCESS);
564 __attribute__ ((noreturn))
void 567 while (1) {sleep(3600);}
Utilities Private Part (do not include from applications).
#define kmr_malloc(Z)
Allocates memory, or aborts when failed.