KMR
wc.reducer.py
1 #!/usr/bin/env python3
2 # -*-coding: utf-8;-*-
3 
4 # wc.reducer.py (2014-10-31)
5 #
6 # The combination of wc.mapper.py, wc.kvgen.sh and wc.reducer.py performs
7 # word counting of files in a specified directory.
8 #
9 # How to run this program.
10 #
11 # 1. Prepare input files
12 # $ mkdir ./inp
13 # $ cp ../file1 ./inp
14 # $ cp ../file2 ./inp
15 #
16 # There are two files so that two mappers will be run to process them.
17 #
18 # 2. Execute kmrrun
19 # $ mpiexec -machinefile machines -np 2 ./kmrrun \
20 # -m ./wc.mapper.py -k ./wc.kvgen.sh -r ./wc.reducer.py ./inp
21 #
22 
23 import sys
24 import os
25 
26 if __name__ == "__main__":
27  argv = sys.argv
28  if (len(argv) != 2):
29  sys.stderr.write("Specify an input file.\n")
30  sys.exit(1)
31 
32  key = None
33  count = 0
34  rf = open(argv[1])
35  line = rf.readline()
36  while line:
37  (k,ns) = line.split()
38  if (key == None):
39  key = k
40  n = int(ns)
41  count += n
42  line = rf.readline()
43  rf.close()
44 
45  os.remove(argv[1])
46  print("%s %d" % (key, count))