KMR
kmrrun
wc.mapper.py
1
#!/usr/bin/env python3
2
# -*-coding: utf-8;-*-
3
4
# wc.mapper.py (2014-10-31)
5
#
6
# The combination of wc.mapper.py, wc.kvgen.sh and wc.reducer.py performs
7
# word counting of files in a specified directory.
8
#
9
# How to run this program.
10
#
11
# 1. Prepare input files
12
# $ mkdir ./inp
13
# $ cp ../file1 ./inp
14
# $ cp ../file2 ./inp
15
#
16
# There are two files so that two mappers will be run to process them.
17
#
18
# 2. Execute kmrrun
19
# $ mpiexec -machinefile machines -np 2 ./kmrrun \
20
# -m ./wc.mapper.py -k ./wc.kvgen.sh -r ./wc.reducer.py ./inp
21
#
22
23
import
sys
24
import
re
25
26
if
__name__ ==
"__main__"
:
27
argv = sys.argv
28
if
(len(argv) != 2):
29
sys.stderr.write(
"Specify an input file.\n"
)
30
sys.exit(1)
31
32
rf = open(argv[1])
33
wf = open(argv[1] +
".out"
,
'w'
)
34
line = rf.readline()
35
while
line:
36
words = re.split(
r'[\s/]+'
, line)
37
for
w
in
words:
38
if
(w ==
''
):
39
continue
40
wf.write(
"%s 1\n"
% (w))
41
line = rf.readline()
42
rf.close()
43
wf.close()
Generated by
1.8.14