1
2
3
4 """
5 Copyright (c) 2008, Muharem Hrnjadovic
6
7 All rights reserved.
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 - Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15 - Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in the
17 documentation and/or other materials provided with the distribution.
18 - Neither the name of Muharem Hrnjadovic nor the names of other
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34 ---------------------------------------------------------------------------
35
36 Module providing functions commonly used in shell scripting:
37
38 - ffind() : finds files in a directory tree
39 - ffindgrep(): finds files in a directory tree and matches their
40 content to regular expressions
41 - freplace() : in-place search/replace of files in a directory tree
42 with regular expressions
43 - printr() : prints the results of the ffind()/ffindgrep() functions
44
45 Please see the U{tutorial <http://muharem.wordpress.com/2007/05/20/python-find-files-using-unix-shell-style-wildcards/>} as well as the documentation strings of the particular functions for detailed information.
46 """
47
48 __version__ = "1.1"
49
50 import fnmatch, itertools, os, sys, re, types
51
53 """The exception raised in case of failures."""
54
55 -def ffind(path, shellglobs=None, namefs=None, relative=True):
56 """
57 Find files in the directory tree starting at 'path' (filtered by
58 Unix shell-style wildcards ('shellglobs') and/or the functions in
59 the 'namefs' sequence).
60
61 Please not that the shell wildcards work in a cumulative fashion
62 i.e. each of them is applied to the full set of file *names* found.
63
64 Conversely, all the functions in 'namefs'
65 - only get to see the output of their respective predecessor
66 function in the sequence (with the obvious exception of the
67 first function)
68 - are applied to the full file *path* (whereas the shell-style
69 wildcards are only applied to the file *names*)
70
71 @type path: string
72 @param path: starting path of the directory tree to be searched
73 @type shellglobs: sequence
74 @param shellglobs: an optional sequence of Unix shell-style wildcards
75 that are to be applied to the file *names* found
76 @type namefs: sequence
77 @param namefs: an optional sequence of functions to be applied to the
78 file *paths* found
79 @type relative: bool
80 @param relative: a boolean flag that determines whether absolute or
81 relative paths should be returned
82 @rtype: sequence
83 @return: paths for files found
84 """
85 if not os.access(path, os.R_OK):
86 raise ScriptError("cannot access path: '%s'" % path)
87
88 fileList = []
89 try:
90 for dir, subdirs, files in os.walk(path):
91 if shellglobs:
92 matched = []
93 for pattern in shellglobs:
94 filterf = lambda s: fnmatch.fnmatchcase(s, pattern)
95 matched.extend(filter(filterf, files))
96 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in matched])
97 else:
98 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in files])
99 if not relative: fileList = map(os.path.abspath, fileList)
100 if namefs:
101 for ff in namefs: fileList = filter(ff, fileList)
102 except Exception, e: raise ScriptError(str(e))
103 return(fileList)
104
105 -def ffindgrep(path, regexl, shellglobs=None, namefs=None,
106 relative=True, linenums=False):
107 """
108 Find files in the directory tree starting at 'path' (filtered by
109 Unix shell-style wildcards ('shellglobs') and/or the functions in
110 the 'namefs' sequence) and search inside these.
111
112 Additionaly, the file content will be filtered by the regular
113 expressions in the 'regexl' sequence. Each entry in the latter
114 is a
115
116 - either a string (with the regex definition)
117 - or a tuple with arguments accepted by re.compile() (the
118 re.M and re.S flags will have no effect though)
119
120 For all the files that pass the file name/content tests the function
121 returns a dictionary where the
122
123 - key is the file name and the
124 - value is a string with lines filtered by 'regexl'
125
126 @type path: string
127 @param path: starting path of the directory tree to be searched
128 @type shellglobs: sequence
129 @param shellglobs: an optional sequence of Unix shell-style wildcards
130 that are to be applied to the file *names* found
131 @type namefs: sequence
132 @param namefs: an optional sequence of functions to be applied to the
133 file *paths* found
134 @type relative: bool
135 @param relative: a boolean flag that determines whether absolute or
136 relative paths should be returned
137 @type linenums: bool
138 @param linenums: turns on line numbers for found files (like grep -n)
139 @rtype: dict
140 @return: file name (key) and lines filtered by 'regexl' (value)
141 """
142 fileList = ffind(path, shellglobs=shellglobs,
143 namefs=namefs, relative=relative)
144 if not fileList: return dict()
145
146 result = dict()
147
148 try:
149
150 ffuncs = []
151 for redata in regexl:
152 if type(redata) == types.StringType:
153 ffuncs.append(re.compile(redata).search)
154 elif type(redata) == types.TupleType:
155 ffuncs.append(re.compile(*redata).search)
156
157 for file in fileList:
158
159 fhandle = open(file, 'r')
160 fcontent = fhandle.read()
161 fhandle.close()
162
163 if linenums: lines = zip(itertools.count(1), fcontent.splitlines())
164 else: lines = fcontent.splitlines()
165 for ff in ffuncs:
166 if linenums: lines = filter(lambda t: ff(t[1]), lines)
167 else: lines = filter(ff, lines)
168
169
170 if not lines: break
171 else:
172
173
174 if lines:
175 if linenums:
176 result[file] = '\n'.join(["%d:%s" % t for t in lines])
177 else:
178 result[file] = '\n'.join(map(str, lines))
179 except Exception, e: raise ScriptError(str(e))
180 return(result)
181
182 -def freplace(path, regexl, shellglobs=None, namefs=None, bext='.bak'):
183 """
184 Find files in the directory tree starting at 'path' (filtered by
185 Unix shell-style wildcards ('shellglobs') and/or the functions in
186 the 'namefs' sequence) and perform an in-place search/replace
187 operation on these.
188
189 Additionally, an in-place search/replace operation is performed
190 on the content of all the files (whose names passed the tests)
191 using the regular expressions in 'regexl'.
192
193 Please note: 'regexl' is a sequence of 3-tuples, each having the
194 following elements:
195
196 - search string (Python regex syntax)
197 - replace string (Python regex syntax)
198 - regex flags or 'None' (re.compile syntax)
199
200 Copies of the modified files are saved in backup files using the
201 extension specified in 'bext'.
202
203 @type path: string
204 @param path: starting path of the directory tree to be searched
205 @type shellglobs: sequence
206 @param shellglobs: an optional sequence of Unix shell-style wildcards
207 that are to be applied to the file *names* found
208 @type namefs: sequence
209 @param namefs: an optional sequence of functions to be applied to the
210 file *paths* found
211 @rtype: number
212 @return: total number of files modified
213 """
214 fileList = ffind(path, shellglobs=shellglobs, namefs=namefs)
215
216
217 if not fileList: return 0
218
219 filesChanged = 0
220
221 try:
222 cffl = []
223 for searchs, replaces, reflags in regexl:
224
225
226 if reflags is not None: regex = re.compile(searchs, reflags)
227 else: regex = re.compile(searchs)
228 cffl.append((regex.subn, replaces))
229 for file in fileList:
230
231 fhandle = open(file, 'r')
232 text = fhandle.read()
233 fhandle.close()
234 substitutions = 0
235
236 for subnfunc, replaces in cffl:
237 text, numOfChanges = subnfunc(replaces, text)
238 substitutions += numOfChanges
239 if substitutions:
240
241 bakFileName = '%s%s' % (file, bext)
242 if os.path.exists(bakFileName): os.unlink(bakFileName)
243 os.rename(file, bakFileName)
244
245 fhandle = open(file, 'w')
246 fhandle.write(text)
247 fhandle.close()
248 filesChanged += 1
249 except Exception, e: raise ScriptError(str(e))
250
251
252 return(filesChanged)
253
255 """
256 Print the results of the ffind()/ffindgrep() functions.
257
258 The output format is similar to the one used by the UNIX find utility.
259 """
260 if type(results) == types.DictType:
261 for f in sorted(results.keys()):
262 sys.stdout.write("%s\n%s\n" % (results[f],f))
263 else:
264 for f in sorted(results):
265 sys.stdout.write("%s\n" % f)
266
267 if __name__ == '__main__':
268 pass
269