KMR
kmrfsplit.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 # -*-coding: utf-8;-*-
3 
4 ## Copyright (C) 2012-2018 RIKEN R-CCS
5 
6 ## \file kmrfsplit.py KMR-Shell File Splitter.
7 
8 import sys
9 import os
10 import re
11 from optparse import OptionParser
12 
13 ## Write part of the inputfile to outputfile.
14 # @param ipath inputfile path.
15 # @param opath outputfile path.
16 # @startpos start position of part.
17 # @endpos end position of part.
18 
19 def writefile(ipath, opath, startpos, endpos) :
20  bufsize = 0x8000000
21  # read buffer size is 128Mbyte
22  try:
23  fin = open(ipath, "r")
24  except IOError:
25  print('Error: could not open "%s".' % ipath)
26  sys.exit()
27 
28  try:
29  fout = open(opath, "w")
30  except IOError:
31  print('Error: could not open "%s".' % opath)
32  sys.exit()
33 
34  fin.seek(startpos, 0)
35  remain = endpos - startpos
36  while remain > 0 :
37  # bufferd read/write.
38  if bufsize > remain :
39  bufsize = remain
40  buf = fin.read(bufsize)
41  fout.write(buf)
42  remain -= len(buf)
43  fin.close()
44  fout.close()
45 
46 ## Caluculate cutting point of file.
47 # Search separator string in proper position of input file
48 # and return cutting point of file.
49 # If separator string not found, print error message and exit.
50 #
51 # @param ipath input file path.
52 # @param sep separator string. (regular expression)
53 # @param startpos start position of separate.
54 # @param endpos end position of separate.
55 
56 def getcuttingpoint(ipath, sep, startpos, partsize) :
57  bufsize = 0x8000000
58  # read buffer size is 128Mbyte
59  filesize = os.path.getsize(ipath)
60  if startpos + partsize > filesize :
61  # remain size of file is smaller than partition size.
62  endpos = filesize
63  else:
64  endpos = startpos + partsize
65  if endpos + bufsize > filesize :
66  bufsize = filesize - endpos
67  try:
68  f = open(ipath, "r")
69  except IOError:
70  print('Error: could not open "%s".' % ipath)
71  sys.exit()
72  f.seek(endpos, 0)
73  # read size of buffer.
74  buf = f.read(bufsize)
75  f.close()
76  # search separator string in the buffer.
77  p = re.compile(sep)
78  ret = p.search(buf)
79  if ret is None:
80  print("Separator not found in proper position.\n")
81  sys.exit()
82  endpos += ret.end()
83  return endpos
84 
85 ## Split a file using separator string.
86 #
87 # @param nums number of part to split.
88 # @param sep separator string. (regular expression)
89 # @param odir output directory of splitted files.
90 # @param opref output file prefix of splitted files.
91 # @param infile input file path.
92 
93 def splitfile(nums, sep, odir, opref, infile) :
94  startpos = 0
95  filesize = os.path.getsize(infile)
96  partsize = filesize // nums
97 
98  print("Splitting file: ")
99  for i in range(nums-1) :
100  endpos = getcuttingpoint(infile, sep, startpos, partsize)
101 
102  # compose output file name.
103  # ex: partXXXXXX, where XXXXXX is number of part.
104  suffix = "%06d" % i
105  opath = os.path.join(odir, (opref + suffix))
106  # output cutted part of input file.
107  writefile(infile, opath, startpos, endpos)
108  startpos = endpos
109  sys.stdout.write('.')
110  sys.stdout.flush()
111 
112  # output remain part of input file.
113  suffix = "%06d" % (nums-1)
114  opath = os.path.join(odir, (opref + suffix))
115  writefile(infile, opath, startpos, filesize)
116  print("done.")
117 
118 ## kmrfsplit main routine.
119 # It works on Python 2.4 or later.
120 
121 if __name__ == "__main__":
122 
123  usage = "usage: %prog [options] inputfile"
124  parser = OptionParser(usage)
125 
126  parser.add_option("-n",
127  "--num-separate",
128  dest="nums",
129  type="int",
130  help="number of file separation",
131  metavar="number",
132  default=1)
133 
134  parser.add_option("-s",
135  "--separator",
136  dest="sep",
137  type="string",
138  help="separator string",
139  metavar="'string'",
140  default='\n')
141 
142  parser.add_option("-d",
143  "--output-directory",
144  dest="odir",
145  type="string",
146  help="output directory",
147  metavar="'string'",
148  default="./")
149 
150  parser.add_option("-p",
151  "--output-file-prefix",
152  dest="opref",
153  type="string",
154  help="output filename prefix",
155  metavar="'string'",
156  default="part")
157 
158  parser.add_option("-f",
159  "--force",
160  dest="force",
161  action="store_true",
162  help="force option",
163  default=False)
164 
165  (options, args) = parser.parse_args()
166 
167  # parameter check.
168  if len(args) != 1 :
169  parser.error("missing parameter")
170  sys.exit()
171 
172  inputfile = args[0]
173 
174  if not os.path.exists(inputfile) :
175  print('Error: inputfile %s is not exist.' % inputfile)
176  sys.exit()
177 
178  if os.path.exists(options.odir) :
179  if not os.path.isdir(options.odir) :
180  print('Error: "%s" is not directory.' % options.odir)
181  sys.exit()
182  else:
183  if options.force :
184  try:
185  os.mkdir(options.odir)
186  except IOError:
187  print('Error: could not create "%s".' % options.odir)
188  sys.exit()
189  else:
190  print('Error: directory "%s" is not exist. create it or use -f option.' % options.odir)
191  sys.exit()
192 
193  splitfile(options.nums, options.sep, options.odir, options.opref, inputfile)
194 
195 # Copyright (C) 2012-2018 RIKEN R-CCS
196 # This library is distributed WITHOUT ANY WARRANTY. This library can be
197 # redistributed and/or modified under the terms of the BSD 2-Clause License.