KMR
wc.mapper.py
1 #!/usr/bin/env python3
2 # -*-coding: utf-8;-*-
3 
4 # wc.mapper.py (2014-10-31)
5 #
6 # The combination of wc.mapper.py, wc.kvgen.sh and wc.reducer.py performs
7 # word counting of files in a specified directory.
8 #
9 # How to run this program.
10 #
11 # 1. Prepare input files
12 # $ mkdir ./inp
13 # $ cp ../file1 ./inp
14 # $ cp ../file2 ./inp
15 #
16 # There are two files so that two mappers will be run to process them.
17 #
18 # 2. Execute kmrrun
19 # $ mpiexec -machinefile machines -np 2 ./kmrrun \
20 # -m ./wc.mapper.py -k ./wc.kvgen.sh -r ./wc.reducer.py ./inp
21 #
22 
23 import sys
24 import re
25 
26 if __name__ == "__main__":
27  argv = sys.argv
28  if (len(argv) != 2):
29  sys.stderr.write("Specify an input file.\n")
30  sys.exit(1)
31 
32  rf = open(argv[1])
33  wf = open(argv[1] + ".out", 'w')
34  line = rf.readline()
35  while line:
36  words = re.split(r'[\s/]+', line)
37  for w in words:
38  if (w == ''):
39  continue
40  wf.write("%s 1\n" % (w))
41  line = rf.readline()
42  rf.close()
43  wf.close()