1
"""Contains miscellaneous helpers"""
6
from urlparse import urlparse
8
from htmlentitydefs import name2codepoint
11
from datetime import timedelta
14
def str_to_boolean(string):
15
if string.lower() in ['true', '1', 't', 'y', 'yes']:
21
def convert_bytes(bytes):
22
"""Returns given bytes as prettified string."""
25
if bytes >= 1099511627776:
26
terabytes = bytes / 1099511627776
27
size = '%.2fT' % terabytes
28
elif bytes >= 1073741824:
29
gigabytes = bytes / 1073741824
30
size = '%.2fG' % gigabytes
31
elif bytes >= 1048576:
32
megabytes = bytes / 1048576
33
size = '%.2fM' % megabytes
35
kilobytes = bytes / 1024
36
size = '%.2fK' % kilobytes
38
size = '%.2fb' % bytes
42
class MergeException(Exception):
44
def __init__(self, value):
48
return repr(self.value)
52
"""Tries to strip all HTML tags from *text*. If unsuccessful returns original text."""
53
from BeautifulSoup import BeautifulSoup
55
text = ' '.join(BeautifulSoup(text).findAll(text=True))
56
return ' '.join(text.split())
61
# This pattern matches a character entity reference (a decimal numeric
62
# references, a hexadecimal numeric reference, or a named reference).
63
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
67
"""Decode HTML entities in the given text."""
68
# From screpe.py - licensed under apache 2.0 .. should not be a problem for a MIT afaik
69
if type(text) is unicode:
72
uchr = lambda value: value > 127 and unichr(value) or chr(value)
74
def entitydecode(match, uchr=uchr):
75
entity = match.group(1)
76
if entity.startswith('#x'):
77
return uchr(int(entity[2:], 16))
78
elif entity.startswith('#'):
79
return uchr(int(entity[1:]))
80
elif entity in name2codepoint:
81
return uchr(name2codepoint[entity])
84
return charrefpat.sub(entitydecode, text)
87
def decode_html(value):
89
:param string value: String to be html-decoded
90
:returns: Html decoded string
92
return _htmldecode(value)
95
def encode_html(unicode_data, encoding='ascii'):
97
Encode unicode_data for use as XML or HTML, with characters outside
98
of the encoding converted to XML numeric character references.
101
return unicode_data.encode(encoding, 'xmlcharrefreplace')
103
# ValueError is raised if there are unencodable chars in the
104
# data and the 'xmlcharrefreplace' error handler is not found.
105
# Pre-2.3 Python doesn't support the 'xmlcharrefreplace' error
106
# handler, so we'll emulate it.
107
return _xmlcharref_encode(unicode_data, encoding)
110
def _xmlcharref_encode(unicode_data, encoding):
111
"""Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
113
# Phase through the unicode_data string one character at a time in
114
# order to catch unencodable characters:
115
for char in unicode_data:
117
chars.append(char.encode(encoding, 'strict'))
119
chars.append('&#%i;' % ord(char))
120
return ''.join(chars)
124
_valid = [types.DictType, types.IntType, types.NoneType,
125
types.StringType, types.UnicodeType, types.BooleanType,
126
types.ListType, types.LongType, types.FloatType]
129
# TODO: I think this was left as broken ...
130
def sanitize(value, logger=None):
131
raise Exception('broken')
132
if isinstance(value, dict):
133
sanitize_dict(value, logger)
134
elif isinstance(value, list):
135
sanitize_list(value, logger)
137
raise Exception('Unsupported datatype')
140
# TODO: I think this was left as broken ...
141
def sanitize_dict(d, logger=None):
142
"""Makes dictionary d contain only yaml.safe_dump compatible elements. On other words, remove all non
143
standard types from dictionary."""
145
if isinstance(type(d[k]), list):
147
elif isinstance(type(d[k]), dict):
148
sanitize_dict(d[k], logger)
149
elif not type(d[k]) in _valid:
151
logger.debug('Removed non yaml compatible key %s %s' % (k, type(d[k])))
155
# TODO: I think this was left as broken ...
156
def sanitize_list(content, logger=None):
157
for value in content[:]:
158
if not type(value) in _valid:
160
logger.debug('Removed non yaml compatible list item %s' % type(value))
161
content.remove(value)
164
def merge_dict_from_to(d1, d2):
165
"""Merges dictionary d1 into dictionary d2. d1 will remain in original form."""
167
for k, v in d1.items():
169
if type(v) == type(d2[k]):
170
if isinstance(v, dict):
171
merge_dict_from_to(d1[k], d2[k])
172
elif isinstance(v, list):
173
d2[k].extend(copy.deepcopy(v))
174
elif isinstance(v, basestring) or isinstance(v, bool) or \
175
isinstance(v, int) or isinstance(v, float):
178
raise Exception('Unknown type: %s value: %s in dictionary' % (type(v), repr(v)))
179
elif isinstance(v, basestring) and isinstance(d2[k], basestring):
180
# Strings are compatible by definition
181
# (though we could get a decode error later, this is higly unlikely for config values)
184
raise MergeException('Merging key %s failed, conflicting datatypes %r vs. %r.' % (
185
k, type(v).__name__, type(d2[k]).__name__))
187
d2[k] = copy.deepcopy(v)
190
class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
192
def http_error_301(self, req, fp, code, msg, headers):
193
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, headers)
197
def http_error_302(self, req, fp, code, msg, headers):
198
result = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
203
def urlopener(url_or_request, log, **kwargs):
205
Utility function for pulling back a url, with a retry of 3 times, increasing the timeout, etc.
206
Re-raises any errors as URLError.
208
.. warning:: This is being replaced by requests library.
209
flexget.utils.requests should be used going forward.
211
:param str url_or_request: URL or Request object to get.
212
:param log: Logger to log debug info and errors to
213
:param kwargs: Keyword arguments to be passed to urlopen
214
:return: The file-like object returned by urlopen
216
from flexget.utils.requests import is_unresponsive, set_unresponsive
218
if isinstance(url_or_request, urllib2.Request):
219
url = url_or_request.get_host()
222
if is_unresponsive(url):
223
msg = '%s is known to be unresponsive, not trying again.' % urlparse(url).hostname
225
raise urllib2.URLError(msg)
227
retries = kwargs.get('retries', 3)
228
timeout = kwargs.get('timeout', 15.0)
230
# get the old timeout for sockets, so we can set it back to that when done. This is NOT threadsafe by the way.
231
# In order to avoid requiring python 2.6, we're not using the urlopen timeout parameter. That really should be used
232
# after checking for python 2.6.
233
oldtimeout = socket.getdefaulttimeout()
235
socket.setdefaulttimeout(timeout)
237
handlers = [SmartRedirectHandler()]
239
handlers.extend(urllib2._opener.handlers)
240
if kwargs.get('handlers'):
241
handlers.extend(kwargs['handlers'])
242
if len(handlers) > 1:
243
handler_names = [h.__class__.__name__ for h in handlers]
244
log.debug('Additional handlers have been specified for this urlopen: %s' % ', '.join(handler_names))
245
opener = urllib2.build_opener(*handlers).open
246
for i in range(retries): # retry getting the url up to 3 times.
250
retrieved = opener(url_or_request, kwargs.get('data'))
251
except urllib2.HTTPError, e:
253
# If it was not a server error, don't keep retrying.
254
log.warning('Could not retrieve url (HTTP %s error): %s' % (e.code, e.url))
256
log.debug('HTTP error (try %i/%i): %s' % (i + 1, retries, e.code))
257
except (urllib2.URLError, socket.timeout), e:
258
if hasattr(e, 'reason'):
259
reason = str(e.reason)
262
if reason == 'timed out':
263
set_unresponsive(url)
264
log.debug('Failed to retrieve url (try %i/%i): %s' % (i + 1, retries, reason))
265
except httplib.IncompleteRead, e:
266
log.critical('Incomplete read - see python bug 6312')
269
# make the returned instance usable in a with statement by adding __enter__ and __exit__ methods
274
def exit(self, exc_type, exc_val, exc_tb):
277
retrieved.__class__.__enter__ = enter
278
retrieved.__class__.__exit__ = exit
281
log.warning('Could not retrieve url: %s' % url_or_request)
282
raise urllib2.URLError('Could not retrieve url after %s tries.' % retries)
284
socket.setdefaulttimeout(oldtimeout)
289
A list that stores regexps.
291
You can add compiled or uncompiled regexps to the list.
292
It will always return the compiled version.
293
It will compile the text regexps on demand when first accessed.
296
# Set the default flags
297
flags = re.IGNORECASE | re.UNICODE
299
def __init__(self, *args, **kwargs):
300
"""Optional :flags: keyword argument with regexp flags to compile with"""
301
if 'flags' in kwargs:
302
self.flags = kwargs['flags']
304
list.__init__(self, *args, **kwargs)
306
def __getitem__(self, k):
307
item = list.__getitem__(self, k)
308
if isinstance(item, basestring):
309
item = re.compile(item, re.IGNORECASE | re.UNICODE)
314
for i in range(len(self)):
318
def make_valid_path(path, windows=None):
319
"""Removes invalid characters from windows pathnames"""
320
drive, path = ntpath.splitdrive(path)
321
if windows is None and drive:
322
# If a drive is found, this is a windows path
325
# Remove invalid characters
326
for char in ':<>*?"|':
327
path = path.replace(char, '')
328
# Windows directories and files cannot end with period
329
path = re.sub(r'(?<![\./\\])\.+(?=[/\\]|$)', '', path)
334
"""Print to console safely."""
335
if isinstance(text, str):
338
print unicode(text).encode('utf8')
341
def parse_timedelta(value):
342
if isinstance(value, timedelta):
343
# Allow timedelta objects to pass through
346
# If no time is given, default to 0
348
amount, unit = value.lower().split(' ')
349
# Make sure unit name is plural.
350
if not unit.endswith('s'):
352
params = {unit: int(amount)}
354
return timedelta(**params)
356
raise ValueError('Invalid time format \'%s\'' % value)