Script scriptutil
[hide private]
[frames] | no frames]

Source Code for Script scriptutil

  1  #!/usr/bin/env python 
  2  # encoding: utf-8 
  3   
  4  """ 
  5  Copyright (c) 2008, Muharem Hrnjadovic 
  6   
  7  All rights reserved. 
  8   
  9  Redistribution and use in source and binary forms, with or without  
 10  modification, are permitted provided that the following conditions 
 11  are met: 
 12   
 13      - Redistributions of source code must retain the above copyright notice, 
 14        this list of conditions and the following disclaimer. 
 15      - Redistributions in binary form must reproduce the above copyright 
 16        notice, this list of conditions and the following disclaimer in the 
 17        documentation and/or other materials provided with the distribution. 
 18      - Neither the name of Muharem Hrnjadovic nor the names of other 
 19        contributors may be used to endorse or promote products derived from 
 20        this software without specific prior written permission. 
 21   
 22  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 23  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 24  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 25  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
 26  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 27  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 28  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 29  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 30  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 31  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 32  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 33   
 34  --------------------------------------------------------------------------- 
 35   
 36  Module providing functions commonly used in shell scripting: 
 37   
 38    - ffind()    : finds files in a directory tree 
 39    - ffindgrep(): finds files in a directory tree and matches their 
 40                   content to regular expressions 
 41    - freplace() : in-place search/replace of files in a directory tree 
 42                   with regular expressions 
 43    - printr()   : prints the results of the ffind()/ffindgrep() functions 
 44   
 45  Please see the documentation strings of the particular functions for 
 46  detailed information. 
 47  """ 
 48   
 49  __version__ = "1.0" 
 50   
 51  import fnmatch, itertools, os, sys, re, types 
 52   
53 -class ScriptError(Exception):
54 """The exception raised in case of failures."""
55
56 -def ffind(path, shellglobs=None, namefs=None, relative=True):
57 """ 58 Find files in the directory tree starting at 'path' (filtered by 59 Unix shell-style wildcards ('shellglobs') and/or the functions in 60 the 'namefs' sequence). 61 62 Please not that the shell wildcards work in a cumulative fashion 63 i.e. each of them is applied to the full set of file *names* found. 64 65 Conversely, all the functions in 'namefs' 66 - only get to see the output of their respective predecessor 67 function in the sequence (with the obvious exception of the 68 first function) 69 - are applied to the full file *path* (whereas the shell-style 70 wildcards are only applied to the file *names*) 71 72 @type path: string 73 @param path: starting path of the directory tree to be searched 74 @type shellglobs: sequence 75 @param shellglobs: an optional sequence of Unix shell-style wildcards 76 that are to be applied to the file *names* found 77 @type namefs: sequence 78 @param namefs: an optional sequence of functions to be applied to the 79 file *paths* found 80 @type relative: bool 81 @param relative: a boolean flag that determines whether absolute or 82 relative paths should be returned 83 @rtype: sequence 84 @return: paths for files found 85 """ 86 if not os.access(path, os.R_OK): 87 raise ScriptError("cannot access path: '%s'" % path) 88 89 fileList = [] # result list 90 try: 91 for dir, subdirs, files in os.walk(path): 92 if shellglobs: 93 matched = [] 94 for pattern in shellglobs: 95 filterf = lambda s: fnmatch.fnmatchcase(s, pattern) 96 matched.extend(filter(filterf, files)) 97 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in matched]) 98 else: 99 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in files]) 100 if not relative: fileList = map(os.path.abspath, fileList) 101 if namefs: 102 for ff in namefs: fileList = filter(ff, fileList) 103 except Exception, e: raise ScriptError(str(e)) 104 return(fileList)
105
106 -def ffindgrep(path, regexl, shellglobs=None, namefs=None, 107 relative=True, linenums=False):
108 """ 109 Find files in the directory tree starting at 'path' (filtered by 110 Unix shell-style wildcards ('shellglobs') and/or the functions in 111 the 'namefs' sequence) and search inside these. 112 113 Additionaly, the file content will be filtered by the regular 114 expressions in the 'regexl' sequence. Each entry in the latter 115 is a 116 117 - either a string (with the regex definition) 118 - or a tuple with arguments accepted by re.compile() (the 119 re.M and re.S flags will have no effect though) 120 121 For all the files that pass the file name/content tests the function 122 returns a dictionary where the 123 124 - key is the file name and the 125 - value is a string with lines filtered by 'regexl' 126 127 @type path: string 128 @param path: starting path of the directory tree to be searched 129 @type shellglobs: sequence 130 @param shellglobs: an optional sequence of Unix shell-style wildcards 131 that are to be applied to the file *names* found 132 @type namefs: sequence 133 @param namefs: an optional sequence of functions to be applied to the 134 file *paths* found 135 @type relative: bool 136 @param relative: a boolean flag that determines whether absolute or 137 relative paths should be returned 138 @type linenums: bool 139 @param linenums: turns on line numbers for found files (like grep -n) 140 @rtype: dict 141 @return: file name (key) and lines filtered by 'regexl' (value) 142 """ 143 fileList = ffind(path, shellglobs=shellglobs, 144 namefs=namefs, relative=relative) 145 if not fileList: return dict() 146 147 result = dict() 148 149 try: 150 # first compile the regular expressions 151 ffuncs = [] 152 for redata in regexl: 153 if type(redata) == types.StringType: 154 ffuncs.append(re.compile(redata).search) 155 elif type(redata) == types.TupleType: 156 ffuncs.append(re.compile(*redata).search) 157 # now grep in the files found 158 for file in fileList: 159 # read file content 160 fhandle = open(file, 'r') 161 fcontent = fhandle.read() 162 fhandle.close() 163 # split file content in lines 164 if linenums: lines = zip(itertools.count(1), fcontent.splitlines()) 165 else: lines = fcontent.splitlines() 166 for ff in ffuncs: 167 if linenums: lines = filter(lambda t: ff(t[1]), lines) 168 else: lines = filter(ff, lines) 169 # there's no point in applying the remaining regular 170 # expressions if we don't have any matching lines any more 171 if not lines: break 172 else: 173 # the loop terminated normally; add this file to the 174 # result set if there are any lines that matched 175 if lines: 176 if linenums: 177 result[file] = '\n'.join(["%d:%s" % t for t in lines]) 178 else: 179 result[file] = '\n'.join(map(str, lines)) 180 except Exception, e: raise ScriptError(str(e)) 181 return(result)
182
183 -def freplace(path, regexl, shellglobs=None, namefs=None, bext='.bak'):
184 """ 185 Find files in the directory tree starting at 'path' (filtered by 186 Unix shell-style wildcards ('shellglobs') and/or the functions in 187 the 'namefs' sequence) and perform an in-place search/replace 188 operation on these. 189 190 Additionally, an in-place search/replace operation is performed 191 on the content of all the files (whose names passed the tests) 192 using the regular expressions in 'regexl'. 193 194 Please note: 'regexl' is a sequence of 3-tuples, each having the 195 following elements: 196 197 - search string (Python regex syntax) 198 - replace string (Python regex syntax) 199 - regex flags or 'None' (re.compile syntax) 200 201 Copies of the modified files are saved in backup files using the 202 extension specified in 'bext'. 203 204 @type path: string 205 @param path: starting path of the directory tree to be searched 206 @type shellglobs: sequence 207 @param shellglobs: an optional sequence of Unix shell-style wildcards 208 that are to be applied to the file *names* found 209 @type namefs: sequence 210 @param namefs: an optional sequence of functions to be applied to the 211 file *paths* found 212 @rtype: number 213 @return: total number of files modified 214 """ 215 fileList = ffind(path, shellglobs=shellglobs, namefs=namefs) 216 217 # return if no files found 218 if not fileList: return 0 219 220 filesChanged = 0 221 222 try: 223 cffl = [] 224 for searchs, replaces, reflags in regexl: 225 # prepare the required regex objects, check whether we need 226 # to pass any regex compilation flags 227 if reflags is not None: regex = re.compile(searchs, reflags) 228 else: regex = re.compile(searchs) 229 cffl.append((regex.subn, replaces)) 230 for file in fileList: 231 # read file content 232 fhandle = open(file, 'r') 233 text = fhandle.read() 234 fhandle.close() 235 substitutions = 0 236 # unpack the subn() function and the replace string 237 for subnfunc, replaces in cffl: 238 text, numOfChanges = subnfunc(replaces, text) 239 substitutions += numOfChanges 240 if substitutions: 241 # first move away the original file 242 bakFileName = '%s%s' % (file, bext) 243 if os.path.exists(bakFileName): os.unlink(bakFileName) 244 os.rename(file, bakFileName) 245 # now write the new file content 246 fhandle = open(file, 'w') 247 fhandle.write(text) 248 fhandle.close() 249 filesChanged += 1 250 except Exception, e: raise ScriptError(str(e)) 251 252 # Returns the number of files that had some of their content changed 253 return(filesChanged)
254
255 -def printr(results):
256 """ 257 Print the results of the ffind()/ffindgrep() functions. 258 259 The output format is similar to the one used by the UNIX find utility. 260 """ 261 if type(results) == types.DictType: 262 for f in sorted(results.keys()): 263 sys.stdout.write("%s\n%s\n" % (results[f],f)) 264 else: 265 for f in sorted(results): 266 sys.stdout.write("%s\n" % f)
267 268 if __name__ == '__main__': 269 pass 270