KMR
Classes | Macros | Functions | Variables
kmrfiles.c File Reference

File Access Support. More...

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <libgen.h>
#include <limits.h>
#include <errno.h>
#include <assert.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
#include "kmr.h"
#include "kmrimpl.h"
#include "kmrfefs.h"

Go to the source code of this file.

Classes

struct  kmr_file_reader
 Segment Reading Information of Each Rank. More...
 

Macros

#define ABS(X)   ((X) < 0 ? -(X) : (X))
 
#define CEILING(N, D)   (((N)+(D)-1)/(D))
 
#define CHUNK_LIMIT   (16 * 1024 *1024)
 Read size limit. More...
 
#define COMINGSOON   0
 
#define MAX(a, b)   (((a)>(b))?(a):(b))
 
#define MIN(a, b)   (((a)<(b))?(a):(b))
 
#define NEVERHERE   0
 

Functions

static void kmr_assert_file_readers_are_sorted (struct kmr_file_reader *sgv, long n)
 
static int kmr_assign_ranks_to_stripe (KMR *mr, char *file, struct kmr_file_reader *sgv, int colorsetsize, _Bool leader, struct kmr_fefs_stripe *stripe)
 
static int kmr_assign_ranks_trivially (KMR *mr, char *file, struct kmr_file_reader *sgv, int colorsetsize, _Bool leader, struct kmr_fefs_stripe *stripe)
 
static int kmr_copyout_file_readers (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvi, KMR_KVS *kvo, void *p)
 
int kmr_file_enumerate (KMR *mr, char *names[], int n, KMR_KVS *kvo, struct kmr_file_option fopt)
 Adds file names in a key-value stream KVO. More...
 
static int kmr_file_reader_compare (const void *p0, const void *p1)
 
static int kmr_get_stripe (KMR *mr, char *file, int colorsetsize, _Bool leader, struct kmr_fefs_stripe *stripe)
 
static int kmr_iogroup (kmr_k_position_t p)
 
int kmr_iogroup_distance (int a0, int a1)
 
int kmr_iogroup_of_node (KMR *mr)
 Returns an I/O-group (an integer key) of a compute node. More...
 
int kmr_iogroup_of_obd (int obdidx)
 Returns an I/O-group (an integer key) of a disk from an OBDIDX of Lustre file-system. More...
 
int kmr_map_file_names (KMR *mr, char **names, int n, struct kmr_file_option fopt, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Maps on file names. More...
 
int kmr_map_getline (KMR *mr, FILE *f, long limit, _Bool largebuffering, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Calls a map-function M for each line by getline() on an input F. More...
 
int kmr_map_getline_in_memory_ (KMR *mr, void *b, size_t sz, long limit, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 
static int kmr_map_getline_nothreading (KMR *mr, FILE *f, long limit, _Bool largebuffering, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 
static int kmr_map_getline_threading (KMR *mr, FILE *f, long limit, _Bool largebuffering, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 
static int kmr_read_and_gather (KMR *mr, _Bool reassembling, char *file, int fd, off_t baseoffset, char *buffer, off_t totalsize, char *tmpbuf, struct kmr_fefs_stripe *stripe, long maxloops, struct kmr_file_reader *sgv, int colorsetsize, int colorindex)
 
int kmr_read_file_by_segments (KMR *mr, char *file, int color, void **buffer, off_t *size)
 Reads one file by segments and reassembles by all-gather. More...
 
int kmr_read_files_reassemble (KMR *mr, char *file, int color, off_t offset, off_t bytes, void **buffer, off_t *size)
 Reassembles files reading by ranks. More...
 
static int kmr_share_segment_information (KMR *mr, char *file, int color, _Bool ingesting, _Bool digesting, off_t offset, off_t bytes, struct kmr_file_reader **sgvq, int *colorsetsizeq, int *colorindexq)
 
static int kmr_share_striping_information (KMR *mr, char *file, int color, struct kmr_file_reader *sgv, int colorsetsize, _Bool leader, _Bool ingesting, _Bool digesting, struct kmr_fefs_stripe *stripe)
 
static int kmr_take_maximum_loop_count (KMR *mr, off_t reads, struct kmr_fefs_stripe *stripe, long *maxloopsq)
 

Variables

static const struct kmr_fefs_stripe_info kmr_bad_stripe = {.size=0, .count=0, .offset=0}
 

Detailed Description

File Access Support.

This provides mappers working on files and directories, especially provides support for the file-system configuration on K. The access practice on the file-system on K has affinity to the z-axis of the TOFU network, to lessen the disturbance to the communication of the other users. Thus, I/O-groups are formed by the nodes on the same z-axis. To respect this access practice, accessing a file should be by nodes with particular positions. The routines defined here ease these coupling. MEMO: This part uses MPI routines directly, because messages here are not eight-byte aligned.

Definition in file kmrfiles.c.

Macro Definition Documentation

◆ CHUNK_LIMIT

#define CHUNK_LIMIT   (16 * 1024 *1024)

Read size limit.

Large read at once slows down on K, due to the limited amount of the kernel buffer (rumored as 128MB).

Definition at line 48 of file kmrfiles.c.

Function Documentation

◆ kmr_iogroup_of_node()

int kmr_iogroup_of_node ( KMR mr)

Returns an I/O-group (an integer key) of a compute node.

Definition at line 106 of file kmrfiles.c.

◆ kmr_iogroup_of_obd()

int kmr_iogroup_of_obd ( int  obdidx)

Returns an I/O-group (an integer key) of a disk from an OBDIDX of Lustre file-system.

It uses magic expressions (x=obdidx/2048) and (y=(obdidx%2048)/64).

Definition at line 120 of file kmrfiles.c.

◆ kmr_read_files_reassemble()

int kmr_read_files_reassemble ( KMR mr,
char *  file,
int  color,
off_t  offset,
off_t  bytes,
void **  buffer,
off_t *  size 
)

Reassembles files reading by ranks.

It is intended to reassembles a file from files split into segments. FILE is a file name. A file name can be null, when the rank does not participate reading (COLOR=-1). COLOR groups ranks (be COLOR>=-1). The files on the ranks with the same COLOR are concatenated, where concatenation is ordered by the rank-order. Read is performed for OFFSET and BYTES on each file. BYTES can be -1 to read an entire file. BUFFER and SIZE are set to the malloced buffer and the size on return. Ranks with non-null FILE retrieve a file (ingest), while ranks with non-zero BUFFER receive contents (digest). Ranks with COLOR=-1 do not participate in file reading. REMARK ON K: It reads a specified file by each rank, assuming the files reside in specific I/O-groups to the ranks.

Definition at line 653 of file kmrfiles.c.

◆ kmr_read_file_by_segments()

int kmr_read_file_by_segments ( KMR mr,
char *  file,
int  color,
void **  buffer,
off_t *  size 
)

Reads one file by segments and reassembles by all-gather.

FILE is a file name. COLOR groups ranks (be COLOR>=-1). The ranks with the same COLOR collaborate to read a file, and thus, they must specify the same file (with an identical inode number). BUFFER and SIZE are set to the malloced buffer and the size on return. Ranks with non-zero FILE retrieve a file (ingest). Ranks with non-zero BUFFER receive contents (digest). Ranks with COLOR=-1 do not participate in file reading, and then arguments should be FILE=0 and BUFFER=0.

Definition at line 1021 of file kmrfiles.c.

◆ kmr_file_enumerate()

int kmr_file_enumerate ( KMR mr,
char *  names[],
int  n,
KMR_KVS kvo,
struct kmr_file_option  fopt 
)

Adds file names in a key-value stream KVO.

It checks the file name NAMES[i] exists, and adds it for a regular file, or enumerates it for a directory.

Definition at line 1157 of file kmrfiles.c.

◆ kmr_map_file_names()

int kmr_map_file_names ( KMR mr,
char **  names,
int  n,
struct kmr_file_option  fopt,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Maps on file names.

NAMES specifies N file names. The map-function gets a file name in the key field (the value field is integer zero). File-option EACH_RANK specifies each rank independently to enumerate file names, otherwise to work on rank0 only. File-option SUBDIRECTORIES specifies to descend to subdirectories. It ignores files/directories whose name starting with dots. File-option LIST_FILE specifies to read contents of each file for file names. File consists of one file name per line, and ignores a line beginning with a "#". Whitespaces are trimed at the beginning and the end. LIST_FILE implies SUBDIRECTORIES. It enumerates names of regular files only. File-option SHUFFLE_FILES runs shuffling file names among ranks.

Definition at line 1372 of file kmrfiles.c.

◆ kmr_map_getline()

int kmr_map_getline ( KMR mr,
FILE *  f,
long  limit,
_Bool  largebuffering,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Calls a map-function M for each line by getline() on an input F.

A map-function gets a line number in key and a string in value (the index argument is the same as the key). Calls to getline() is limited to LIMIT lines (0 for unlimited). It is multi-threaded and the call order is arbitrary. ARG and OPT are passed verbatim to a map-function. Effective-options: NOTHREADING, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 1561 of file kmrfiles.c.