Script scriptutil
[hide private]
[frames] | no frames]

Source Code for Script scriptutil

  1  #!/usr/bin/env python 
  2  # encoding: utf-8 
  3   
  4  """ 
  5  Copyright (c) 2008, Muharem Hrnjadovic 
  6   
  7  All rights reserved. 
  8   
  9  Redistribution and use in source and binary forms, with or without  
 10  modification, are permitted provided that the following conditions 
 11  are met: 
 12   
 13      - Redistributions of source code must retain the above copyright notice, 
 14        this list of conditions and the following disclaimer. 
 15      - Redistributions in binary form must reproduce the above copyright 
 16        notice, this list of conditions and the following disclaimer in the 
 17        documentation and/or other materials provided with the distribution. 
 18      - Neither the name of Muharem Hrnjadovic nor the names of other 
 19        contributors may be used to endorse or promote products derived from 
 20        this software without specific prior written permission. 
 21   
 22  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 23  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 24  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 25  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
 26  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 27  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 28  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 29  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 30  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 31  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 32  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 33   
 34  --------------------------------------------------------------------------- 
 35   
 36  Module providing functions commonly used in shell scripting: 
 37   
 38    - ffind()    : finds files in a directory tree 
 39    - ffindgrep(): finds files in a directory tree and matches their 
 40                   content to regular expressions 
 41    - freplace() : in-place search/replace of files in a directory tree 
 42                   with regular expressions 
 43    - printr()   : prints the results of the ffind()/ffindgrep() functions 
 44   
 45  Please see the U{tutorial <http://muharem.wordpress.com/2007/05/20/python-find-files-using-unix-shell-style-wildcards/>} as well as the documentation strings of the particular functions for detailed information. 
 46  """ 
 47   
 48  __version__ = "1.1" 
 49   
 50  import fnmatch, itertools, os, sys, re, types 
 51   
52 -class ScriptError(Exception):
53 """The exception raised in case of failures."""
54
55 -def ffind(path, shellglobs=None, namefs=None, relative=True):
56 """ 57 Find files in the directory tree starting at 'path' (filtered by 58 Unix shell-style wildcards ('shellglobs') and/or the functions in 59 the 'namefs' sequence). 60 61 Please not that the shell wildcards work in a cumulative fashion 62 i.e. each of them is applied to the full set of file *names* found. 63 64 Conversely, all the functions in 'namefs' 65 - only get to see the output of their respective predecessor 66 function in the sequence (with the obvious exception of the 67 first function) 68 - are applied to the full file *path* (whereas the shell-style 69 wildcards are only applied to the file *names*) 70 71 @type path: string 72 @param path: starting path of the directory tree to be searched 73 @type shellglobs: sequence 74 @param shellglobs: an optional sequence of Unix shell-style wildcards 75 that are to be applied to the file *names* found 76 @type namefs: sequence 77 @param namefs: an optional sequence of functions to be applied to the 78 file *paths* found 79 @type relative: bool 80 @param relative: a boolean flag that determines whether absolute or 81 relative paths should be returned 82 @rtype: sequence 83 @return: paths for files found 84 """ 85 if not os.access(path, os.R_OK): 86 raise ScriptError("cannot access path: '%s'" % path) 87 88 fileList = [] # result list 89 try: 90 for dir, subdirs, files in os.walk(path): 91 if shellglobs: 92 matched = [] 93 for pattern in shellglobs: 94 filterf = lambda s: fnmatch.fnmatchcase(s, pattern) 95 matched.extend(filter(filterf, files)) 96 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in matched]) 97 else: 98 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in files]) 99 if not relative: fileList = map(os.path.abspath, fileList) 100 if namefs: 101 for ff in namefs: fileList = filter(ff, fileList) 102 except Exception, e: raise ScriptError(str(e)) 103 return(fileList)
104
105 -def ffindgrep(path, regexl, shellglobs=None, namefs=None, 106 relative=True, linenums=False):
107 """ 108 Find files in the directory tree starting at 'path' (filtered by 109 Unix shell-style wildcards ('shellglobs') and/or the functions in 110 the 'namefs' sequence) and search inside these. 111 112 Additionaly, the file content will be filtered by the regular 113 expressions in the 'regexl' sequence. Each entry in the latter 114 is a 115 116 - either a string (with the regex definition) 117 - or a tuple with arguments accepted by re.compile() (the 118 re.M and re.S flags will have no effect though) 119 120 For all the files that pass the file name/content tests the function 121 returns a dictionary where the 122 123 - key is the file name and the 124 - value is a string with lines filtered by 'regexl' 125 126 @type path: string 127 @param path: starting path of the directory tree to be searched 128 @type shellglobs: sequence 129 @param shellglobs: an optional sequence of Unix shell-style wildcards 130 that are to be applied to the file *names* found 131 @type namefs: sequence 132 @param namefs: an optional sequence of functions to be applied to the 133 file *paths* found 134 @type relative: bool 135 @param relative: a boolean flag that determines whether absolute or 136 relative paths should be returned 137 @type linenums: bool 138 @param linenums: turns on line numbers for found files (like grep -n) 139 @rtype: dict 140 @return: file name (key) and lines filtered by 'regexl' (value) 141 """ 142 fileList = ffind(path, shellglobs=shellglobs, 143 namefs=namefs, relative=relative) 144 if not fileList: return dict() 145 146 result = dict() 147 148 try: 149 # first compile the regular expressions 150 ffuncs = [] 151 for redata in regexl: 152 if type(redata) == types.StringType: 153 ffuncs.append(re.compile(redata).search) 154 elif type(redata) == types.TupleType: 155 ffuncs.append(re.compile(*redata).search) 156 # now grep in the files found 157 for file in fileList: 158 # read file content 159 fhandle = open(file, 'r') 160 fcontent = fhandle.read() 161 fhandle.close() 162 # split file content in lines 163 if linenums: lines = zip(itertools.count(1), fcontent.splitlines()) 164 else: lines = fcontent.splitlines() 165 for ff in ffuncs: 166 if linenums: lines = filter(lambda t: ff(t[1]), lines) 167 else: lines = filter(ff, lines) 168 # there's no point in applying the remaining regular 169 # expressions if we don't have any matching lines any more 170 if not lines: break 171 else: 172 # the loop terminated normally; add this file to the 173 # result set if there are any lines that matched 174 if lines: 175 if linenums: 176 result[file] = '\n'.join(["%d:%s" % t for t in lines]) 177 else: 178 result[file] = '\n'.join(map(str, lines)) 179 except Exception, e: raise ScriptError(str(e)) 180 return(result)
181
182 -def freplace(path, regexl, shellglobs=None, namefs=None, bext='.bak'):
183 """ 184 Find files in the directory tree starting at 'path' (filtered by 185 Unix shell-style wildcards ('shellglobs') and/or the functions in 186 the 'namefs' sequence) and perform an in-place search/replace 187 operation on these. 188 189 Additionally, an in-place search/replace operation is performed 190 on the content of all the files (whose names passed the tests) 191 using the regular expressions in 'regexl'. 192 193 Please note: 'regexl' is a sequence of 3-tuples, each having the 194 following elements: 195 196 - search string (Python regex syntax) 197 - replace string (Python regex syntax) 198 - regex flags or 'None' (re.compile syntax) 199 200 Copies of the modified files are saved in backup files using the 201 extension specified in 'bext'. 202 203 @type path: string 204 @param path: starting path of the directory tree to be searched 205 @type shellglobs: sequence 206 @param shellglobs: an optional sequence of Unix shell-style wildcards 207 that are to be applied to the file *names* found 208 @type namefs: sequence 209 @param namefs: an optional sequence of functions to be applied to the 210 file *paths* found 211 @rtype: number 212 @return: total number of files modified 213 """ 214 fileList = ffind(path, shellglobs=shellglobs, namefs=namefs) 215 216 # return if no files found 217 if not fileList: return 0 218 219 filesChanged = 0 220 221 try: 222 cffl = [] 223 for searchs, replaces, reflags in regexl: 224 # prepare the required regex objects, check whether we need 225 # to pass any regex compilation flags 226 if reflags is not None: regex = re.compile(searchs, reflags) 227 else: regex = re.compile(searchs) 228 cffl.append((regex.subn, replaces)) 229 for file in fileList: 230 # read file content 231 fhandle = open(file, 'r') 232 text = fhandle.read() 233 fhandle.close() 234 substitutions = 0 235 # unpack the subn() function and the replace string 236 for subnfunc, replaces in cffl: 237 text, numOfChanges = subnfunc(replaces, text) 238 substitutions += numOfChanges 239 if substitutions: 240 # first move away the original file 241 bakFileName = '%s%s' % (file, bext) 242 if os.path.exists(bakFileName): os.unlink(bakFileName) 243 os.rename(file, bakFileName) 244 # now write the new file content 245 fhandle = open(file, 'w') 246 fhandle.write(text) 247 fhandle.close() 248 filesChanged += 1 249 except Exception, e: raise ScriptError(str(e)) 250 251 # Returns the number of files that had some of their content changed 252 return(filesChanged)
253
254 -def printr(results):
255 """ 256 Print the results of the ffind()/ffindgrep() functions. 257 258 The output format is similar to the one used by the UNIX find utility. 259 """ 260 if type(results) == types.DictType: 261 for f in sorted(results.keys()): 262 sys.stdout.write("%s\n%s\n" % (results[f],f)) 263 else: 264 for f in sorted(results): 265 sys.stdout.write("%s\n" % f)
266 267 if __name__ == '__main__': 268 pass 269