1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """This module exposes the L{LogNormalizer} class that can be used for
24 higher-level management of the normalization flow.
25 Using this module is in no way mandatory in order to benefit from
26 the normalization system; the C{LogNormalizer} class provides basic facilities
27 for further integration in a wider project (web services, ...).
28 """
29
30 import os
31 import uuid as _UUID_
32 import warnings
33 import StringIO
34
35 from normalizer import Normalizer
36 from lxml.etree import parse, DTD, fromstring as XMLfromstring
37
39 """Basic normalization flow manager.
40 Normalizers definitions are loaded from a path and checked against the DTD.
41 If the definitions are syntactically correct, the normalizers are
42 instantiated and populate the manager's cache.
43 Normalization priormority is established as follows:
44
45 * Maximum priority assigned to normalizers where the "appliedTo" tag is set
46 to "raw". They MUST be mutually exclusive.
47 * Medium priority assigned to normalizers where the "appliedTo" tag is set
48 to "body".
49 * Lowest priority assigned to any remaining normalizers.
50
51 Some extra treatment is also done prior and after the log normalization:
52
53 * Assignment of a unique ID, under the tag "uuid"
54 * Conversion of date tags to UTC, if the "_timezone" was set prior to
55 the normalization process."""
56
57 - def __init__(self, normalizers_paths, active_normalizers = {}):
58 """
59 Instantiates a flow manager. The default behavior is to activate every
60 available normalizer.
61
62 @param normalizers_paths: a list of absolute paths to the normalizer
63 XML definitions to use or a just a single path as str.
64 @param active_normalizers: a dictionary of active normalizers
65 in the form {name: [True|False]}.
66 """
67 if not isinstance(normalizers_paths, list or tuple):
68 normalizers_paths = [normalizers_paths,]
69 self.normalizers_paths = normalizers_paths
70 self.active_normalizers = active_normalizers
71 self.dtd, self.ctt, self.ccb = None, None, None
72
73
74
75
76 for norm_path in self.normalizers_paths:
77 if not os.path.isdir(norm_path):
78 raise ValueError, "Invalid normalizer directory : %s" % norm_path
79 dtd = os.path.join(norm_path, 'normalizer.dtd')
80 ctt = os.path.join(norm_path, 'common_tagTypes.xml')
81 ccb = os.path.join(norm_path, 'common_callBacks.xml')
82 if os.path.isfile(dtd):
83 self.dtd = DTD(open(dtd))
84 if os.path.isfile(ctt):
85 self.ctt = ctt
86 if os.path.isfile(ccb):
87 self.ccb = ccb
88
89
90 if not self.dtd or not self.ctt or not self.ccb:
91 raise StandardError, "Missing DTD or common library files"
92 self._cache = []
93 self.reload()
94
96 """Refreshes this instance's normalizers pool."""
97 self.normalizers = { 'raw' : [], 'body' : [] }
98 for path in self.iter_normalizer():
99 norm = parse(open(path))
100 if not self.dtd.validate(norm):
101 warnings.warn('Skipping %s : invalid DTD' % path)
102 print 'invalid normalizer ', path
103 else:
104 normalizer = Normalizer(norm, self.ctt, self.ccb)
105 normalizer.uuid = self._compute_norm_uuid(normalizer)
106 self.normalizers.setdefault(normalizer.appliedTo, [])
107 self.normalizers[normalizer.appliedTo].append(normalizer)
108 self.activate_normalizers()
109
112
114 """ Iterates through normalizers and returns the normalizers' paths.
115
116 @return: a generator of absolute paths.
117 """
118 for path in self.normalizers_paths:
119 for root, dirs, files in os.walk(path):
120 for name in files:
121 if not name.startswith('common_tagTypes') and \
122 not name.startswith('common_callBacks') and \
123 name.endswith('.xml'):
124 yield os.path.join(root, name)
125
127 """ Returns the amount of available normalizers.
128 """
129 return len([n for n in self.iter_normalizer()])
130
132 """used to add or update a normalizer.
133 @param raw_xml_contents: XML description of normalizer as flat XML. It
134 must comply to the DTD.
135 @param name: if set, the XML description will be saved as name.xml.
136 If left blank, name will be fetched from the XML description.
137 @param dir_path: the path to the directory where to copy the given
138 normalizer.
139 """
140 path = self.normalizers_paths[0]
141 if dir_path:
142 if dir_path in self.normalizers_paths:
143 path = dir_path
144 xmlconf = XMLfromstring(raw_xml_contents).getroottree()
145 if not self.dtd.validate(xmlconf):
146 raise ValueError, "This definition file does not follow the normalizers DTD :\n\n%s" % \
147 self.dtd.error_log.filter_from_errors()
148 if not name:
149 name = xmlconf.getroot().get('name')
150 if not name.endswith('.xml'):
151 name += '.xml'
152 xmlconf.write(open(os.path.join(path, name), 'w'),
153 encoding = 'utf8',
154 method = 'xml',
155 pretty_print = True)
156 self.reload()
157
159 """Returns normalizer by uuid."""
160 try:
161 norm = [ u for u in sum(self.normalizers.values(), []) if u.uuid == uuid][0]
162 return norm
163 except:
164 raise ValueError, "Normalizer uuid : %s not found" % uuid
165
169
173
174
176 """Activates normalizers according to what was set by calling
177 set_active_normalizers. If no call to the latter function has been
178 made so far, this method activates every normalizer."""
179 if not self.active_normalizers:
180 self.active_normalizers = dict([ (n.uuid, True) for n in \
181 sum([ v for v in self.normalizers.values()], []) ])
182
183 self.set_active_normalizers(self.active_normalizers)
184
185 self._cache = []
186
187 for norm in self.normalizers['raw']:
188
189
190 if self.active_normalizers.get(norm.uuid, False):
191 self._cache.append(norm)
192
193 for norm in self.normalizers['body']:
194 if self.active_normalizers.get(norm.uuid, False):
195 self._cache.append(norm)
196
197 for norm in sum([ self.normalizers[u] for u in self.normalizers
198 if u not in ['raw', 'body']], []):
199 if self.active_normalizers.get(norm.uuid, False):
200 self._cache.append(norm)
201
203 """Returns a dictionary of normalizers; keys are normalizers' uuid and
204 values are True|False according to the normalizer's activation state."""
205 return self.active_normalizers
206
208 """Sets the active/inactive normalizers. Default behavior is to
209 deactivate every normalizer.
210
211 @param norms: a dictionary, similar to the one returned by
212 get_active_normalizers."""
213 default = dict([ (n.uuid, False) for n in \
214 sum([ v for v in self.normalizers.values()], []) ])
215 default.update(norms)
216 self.active_normalizers = default
217
219 """ This method is the entry point to normalize data (a log).
220
221 data is passed through every activated normalizer
222 and extra tagging occurs accordingly.
223
224 data receives also an extra uuid tag.
225
226 @param data: must be a dictionary with at least a key 'raw' or 'body'
227 with BaseString values (preferably Unicode).
228
229 Here an example :
230 >>> from logsparser import lognormalizer
231 >>> from pprint import pprint
232 >>> ln = lognormalizer.LogNormalizer('/usr/local/share/normalizers/')
233 >>> mylog = {'raw' : 'Jul 18 15:35:01 zoo /USR/SBIN/CRON[14338]: (root) CMD (/srv/git/redmine-changesets.sh)'}
234 >>> ln.lognormalize(mylog)
235 >>> pprint mylog
236 {'body': '(root) CMD (/srv/git/redmine-changesets.sh)',
237 'date': datetime.datetime(2011, 7, 18, 15, 35, 1),
238 'pid': '14338',
239 'program': '/USR/SBIN/CRON',
240 'raw': 'Jul 18 15:35:01 zoo /USR/SBIN/CRON[14338]: (root) CMD (/srv/git/redmine-changesets.sh)',
241 'source': 'zoo',
242 'uuid': 70851882840934161193887647073096992594L}
243 """
244 data = self.uuidify(data)
245 data = self.normalize(data)
246
247
248
250 """Adds a unique UID to the normalized log."""
251 log["uuid"] = _UUID_.uuid4().int
252 return log
253
255 """plain normalization."""
256 for norm in self._cache:
257 log = norm.normalize(log)
258 return log
259
261 """Used for testing only, the normalizers' tags prerequisite are
262 deactivated."""
263 for norm in self._cache:
264 log = norm.normalize(log, do_not_check_prereq = True)
265 return log
266