1
2
3
4 """
5 Copyright (c) 2008, Muharem Hrnjadovic
6
7 All rights reserved.
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 - Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15 - Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in the
17 documentation and/or other materials provided with the distribution.
18 - Neither the name of Muharem Hrnjadovic nor the names of other
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34 ---------------------------------------------------------------------------
35
36 Module providing functions commonly used in shell scripting:
37
38 - ffind() : finds files in a directory tree
39 - ffindgrep(): finds files in a directory tree and matches their
40 content to regular expressions
41 - freplace() : in-place search/replace of files in a directory tree
42 with regular expressions
43 - printr() : prints the results of the ffind()/ffindgrep() functions
44
45 Please see the documentation strings of the particular functions for
46 detailed information.
47 """
48
49 __version__ = "1.0"
50
51 import fnmatch, itertools, os, sys, re, types
52
54 """The exception raised in case of failures."""
55
56 -def ffind(path, shellglobs=None, namefs=None, relative=True):
57 """
58 Find files in the directory tree starting at 'path' (filtered by
59 Unix shell-style wildcards ('shellglobs') and/or the functions in
60 the 'namefs' sequence).
61
62 Please not that the shell wildcards work in a cumulative fashion
63 i.e. each of them is applied to the full set of file *names* found.
64
65 Conversely, all the functions in 'namefs'
66 - only get to see the output of their respective predecessor
67 function in the sequence (with the obvious exception of the
68 first function)
69 - are applied to the full file *path* (whereas the shell-style
70 wildcards are only applied to the file *names*)
71
72 @type path: string
73 @param path: starting path of the directory tree to be searched
74 @type shellglobs: sequence
75 @param shellglobs: an optional sequence of Unix shell-style wildcards
76 that are to be applied to the file *names* found
77 @type namefs: sequence
78 @param namefs: an optional sequence of functions to be applied to the
79 file *paths* found
80 @type relative: bool
81 @param relative: a boolean flag that determines whether absolute or
82 relative paths should be returned
83 @rtype: sequence
84 @return: paths for files found
85 """
86 if not os.access(path, os.R_OK):
87 raise ScriptError("cannot access path: '%s'" % path)
88
89 fileList = []
90 try:
91 for dir, subdirs, files in os.walk(path):
92 if shellglobs:
93 matched = []
94 for pattern in shellglobs:
95 filterf = lambda s: fnmatch.fnmatchcase(s, pattern)
96 matched.extend(filter(filterf, files))
97 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in matched])
98 else:
99 fileList.extend(['%s%s%s' % (dir, os.sep, f) for f in files])
100 if not relative: fileList = map(os.path.abspath, fileList)
101 if namefs:
102 for ff in namefs: fileList = filter(ff, fileList)
103 except Exception, e: raise ScriptError(str(e))
104 return(fileList)
105
106 -def ffindgrep(path, regexl, shellglobs=None, namefs=None,
107 relative=True, linenums=False):
108 """
109 Find files in the directory tree starting at 'path' (filtered by
110 Unix shell-style wildcards ('shellglobs') and/or the functions in
111 the 'namefs' sequence) and search inside these.
112
113 Additionaly, the file content will be filtered by the regular
114 expressions in the 'regexl' sequence. Each entry in the latter
115 is a
116
117 - either a string (with the regex definition)
118 - or a tuple with arguments accepted by re.compile() (the
119 re.M and re.S flags will have no effect though)
120
121 For all the files that pass the file name/content tests the function
122 returns a dictionary where the
123
124 - key is the file name and the
125 - value is a string with lines filtered by 'regexl'
126
127 @type path: string
128 @param path: starting path of the directory tree to be searched
129 @type shellglobs: sequence
130 @param shellglobs: an optional sequence of Unix shell-style wildcards
131 that are to be applied to the file *names* found
132 @type namefs: sequence
133 @param namefs: an optional sequence of functions to be applied to the
134 file *paths* found
135 @type relative: bool
136 @param relative: a boolean flag that determines whether absolute or
137 relative paths should be returned
138 @type linenums: bool
139 @param linenums: turns on line numbers for found files (like grep -n)
140 @rtype: dict
141 @return: file name (key) and lines filtered by 'regexl' (value)
142 """
143 fileList = ffind(path, shellglobs=shellglobs,
144 namefs=namefs, relative=relative)
145 if not fileList: return dict()
146
147 result = dict()
148
149 try:
150
151 ffuncs = []
152 for redata in regexl:
153 if type(redata) == types.StringType:
154 ffuncs.append(re.compile(redata).search)
155 elif type(redata) == types.TupleType:
156 ffuncs.append(re.compile(*redata).search)
157
158 for file in fileList:
159
160 fhandle = open(file, 'r')
161 fcontent = fhandle.read()
162 fhandle.close()
163
164 if linenums: lines = zip(itertools.count(1), fcontent.splitlines())
165 else: lines = fcontent.splitlines()
166 for ff in ffuncs:
167 if linenums: lines = filter(lambda t: ff(t[1]), lines)
168 else: lines = filter(ff, lines)
169
170
171 if not lines: break
172 else:
173
174
175 if lines:
176 if linenums:
177 result[file] = '\n'.join(["%d:%s" % t for t in lines])
178 else:
179 result[file] = '\n'.join(map(str, lines))
180 except Exception, e: raise ScriptError(str(e))
181 return(result)
182
183 -def freplace(path, regexl, shellglobs=None, namefs=None, bext='.bak'):
184 """
185 Find files in the directory tree starting at 'path' (filtered by
186 Unix shell-style wildcards ('shellglobs') and/or the functions in
187 the 'namefs' sequence) and perform an in-place search/replace
188 operation on these.
189
190 Additionally, an in-place search/replace operation is performed
191 on the content of all the files (whose names passed the tests)
192 using the regular expressions in 'regexl'.
193
194 Please note: 'regexl' is a sequence of 3-tuples, each having the
195 following elements:
196
197 - search string (Python regex syntax)
198 - replace string (Python regex syntax)
199 - regex flags or 'None' (re.compile syntax)
200
201 Copies of the modified files are saved in backup files using the
202 extension specified in 'bext'.
203
204 @type path: string
205 @param path: starting path of the directory tree to be searched
206 @type shellglobs: sequence
207 @param shellglobs: an optional sequence of Unix shell-style wildcards
208 that are to be applied to the file *names* found
209 @type namefs: sequence
210 @param namefs: an optional sequence of functions to be applied to the
211 file *paths* found
212 @rtype: number
213 @return: total number of files modified
214 """
215 fileList = ffind(path, shellglobs=shellglobs, namefs=namefs)
216
217
218 if not fileList: return 0
219
220 filesChanged = 0
221
222 try:
223 cffl = []
224 for searchs, replaces, reflags in regexl:
225
226
227 if reflags is not None: regex = re.compile(searchs, reflags)
228 else: regex = re.compile(searchs)
229 cffl.append((regex.subn, replaces))
230 for file in fileList:
231
232 fhandle = open(file, 'r')
233 text = fhandle.read()
234 fhandle.close()
235 substitutions = 0
236
237 for subnfunc, replaces in cffl:
238 text, numOfChanges = subnfunc(replaces, text)
239 substitutions += numOfChanges
240 if substitutions:
241
242 bakFileName = '%s%s' % (file, bext)
243 if os.path.exists(bakFileName): os.unlink(bakFileName)
244 os.rename(file, bakFileName)
245
246 fhandle = open(file, 'w')
247 fhandle.write(text)
248 fhandle.close()
249 filesChanged += 1
250 except Exception, e: raise ScriptError(str(e))
251
252
253 return(filesChanged)
254
256 """
257 Print the results of the ffind()/ffindgrep() functions.
258
259 The output format is similar to the one used by the UNIX find utility.
260 """
261 if type(results) == types.DictType:
262 for f in sorted(results.keys()):
263 sys.stdout.write("%s\n%s\n" % (results[f],f))
264 else:
265 for f in sorted(results):
266 sys.stdout.write("%s\n" % f)
267
268 if __name__ == '__main__':
269 pass
270