flexget.utils.tools
Covered: 252 lines
Missed: 16 lines
Skipped 89 lines
Percent: 94 %
  1
"""Contains miscellaneous helpers"""
  3
import urllib2
  4
import httplib
  5
import socket
  6
from urlparse import urlparse
  7
import time
  8
from htmlentitydefs import name2codepoint
  9
import re
 10
import ntpath
 11
from datetime import timedelta
 14
def str_to_boolean(string):
 15
    if string.lower() in ['true', '1', 't', 'y', 'yes']:
 16
        return True
 17
    else:
 18
        return False
 21
def convert_bytes(bytes):
 22
    """Returns given bytes as prettified string."""
 24
    bytes = float(bytes)
 25
    if bytes >= 1099511627776:
 26
        terabytes = bytes / 1099511627776
 27
        size = '%.2fT' % terabytes
 28
    elif bytes >= 1073741824:
 29
        gigabytes = bytes / 1073741824
 30
        size = '%.2fG' % gigabytes
 31
    elif bytes >= 1048576:
 32
        megabytes = bytes / 1048576
 33
        size = '%.2fM' % megabytes
 34
    elif bytes >= 1024:
 35
        kilobytes = bytes / 1024
 36
        size = '%.2fK' % kilobytes
 37
    else:
 38
        size = '%.2fb' % bytes
 39
    return size
 42
class MergeException(Exception):
 44
    def __init__(self, value):
 45
        self.value = value
 47
    def __str__(self):
 48
        return repr(self.value)
 51
def strip_html(text):
 52
    """Tries to strip all HTML tags from *text*. If unsuccessful returns original text."""
 53
    from BeautifulSoup import BeautifulSoup
 54
    try:
 55
        text = ' '.join(BeautifulSoup(text).findAll(text=True))
 56
        return ' '.join(text.split())
 57
    except:
 58
        return text
 63
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
 66
def _htmldecode(text):
 67
    """Decode HTML entities in the given text."""
 69
    if type(text) is unicode:
 70
        uchr = unichr
 71
    else:
 72
        uchr = lambda value: value > 127 and unichr(value) or chr(value)
 74
    def entitydecode(match, uchr=uchr):
 75
        entity = match.group(1)
 76
        if entity.startswith('#x'):
 77
            return uchr(int(entity[2:], 16))
 78
        elif entity.startswith('#'):
 79
            return uchr(int(entity[1:]))
 80
        elif entity in name2codepoint:
 81
            return uchr(name2codepoint[entity])
 82
        else:
 83
            return match.group(0)
 84
    return charrefpat.sub(entitydecode, text)
 87
def decode_html(value):
 88
    """
 89
    :param string value: String to be html-decoded
 90
    :returns: Html decoded string
 91
    """
 92
    return _htmldecode(value)
 95
def encode_html(unicode_data, encoding='ascii'):
 96
    """
 97
    Encode unicode_data for use as XML or HTML, with characters outside
 98
    of the encoding converted to XML numeric character references.
 99
    """
100
    try:
101
        return unicode_data.encode(encoding, 'xmlcharrefreplace')
102
    except ValueError:
107
        return _xmlcharref_encode(unicode_data, encoding)
110
def _xmlcharref_encode(unicode_data, encoding):
111
    """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
112
    chars = []
115
    for char in unicode_data:
116
        try:
117
            chars.append(char.encode(encoding, 'strict'))
118
        except UnicodeError:
119
            chars.append('&#%i;' % ord(char))
120
    return ''.join(chars)
123
import types
124
_valid = [types.DictType, types.IntType, types.NoneType,
125
          types.StringType, types.UnicodeType, types.BooleanType,
126
          types.ListType, types.LongType, types.FloatType]
130
def sanitize(value, logger=None):
131
    raise Exception('broken')
132
    if isinstance(value, dict):
133
        sanitize_dict(value, logger)
134
    elif isinstance(value, list):
135
        sanitize_list(value, logger)
136
    else:
137
        raise Exception('Unsupported datatype')
141
def sanitize_dict(d, logger=None):
142
    """Makes dictionary d contain only yaml.safe_dump compatible elements. On other words, remove all non
143
    standard types from dictionary."""
144
    for k in d.keys():
145
        if isinstance(type(d[k]), list):
146
            sanitize_list(d[k])
147
        elif isinstance(type(d[k]), dict):
148
            sanitize_dict(d[k], logger)
149
        elif not type(d[k]) in _valid:
150
            if logger:
151
                logger.debug('Removed non yaml compatible key %s %s' % (k, type(d[k])))
152
            d.pop(k)
156
def sanitize_list(content, logger=None):
157
    for value in content[:]:
158
        if not type(value) in _valid:
159
            if logger:
160
                logger.debug('Removed non yaml compatible list item %s' % type(value))
161
        content.remove(value)
164
def merge_dict_from_to(d1, d2):
165
    """Merges dictionary d1 into dictionary d2. d1 will remain in original form."""
166
    import copy
167
    for k, v in d1.items():
168
        if k in d2:
169
            if type(v) == type(d2[k]):
170
                if isinstance(v, dict):
171
                    merge_dict_from_to(d1[k], d2[k])
172
                elif isinstance(v, list):
173
                    d2[k].extend(copy.deepcopy(v))
174
                elif isinstance(v, basestring) or isinstance(v, bool) or \
175
                     isinstance(v, int) or isinstance(v, float):
176
                    pass
177
                else:
178
                    raise Exception('Unknown type: %s value: %s in dictionary' % (type(v), repr(v)))
179
            elif isinstance(v, basestring) and isinstance(d2[k], basestring):
182
                pass
183
            else:
184
                raise MergeException('Merging key %s failed, conflicting datatypes %r vs. %r.' % (
185
                    k, type(v).__name__, type(d2[k]).__name__))
186
        else:
187
            d2[k] = copy.deepcopy(v)
190
class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
192
    def http_error_301(self, req, fp, code, msg, headers):
193
        result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, headers)
194
        result.status = code
195
        return result
197
    def http_error_302(self, req, fp, code, msg, headers):
198
        result = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
199
        result.status = code
200
        return result
203
def urlopener(url_or_request, log, **kwargs):
204
    """
205
    Utility function for pulling back a url, with a retry of 3 times, increasing the timeout, etc.
206
    Re-raises any errors as URLError.
208
    .. warning:: This is being replaced by requests library.
209
                 flexget.utils.requests should be used going forward.
211
    :param str url_or_request: URL or Request object to get.
212
    :param log: Logger to log debug info and errors to
213
    :param kwargs: Keyword arguments to be passed to urlopen
214
    :return: The file-like object returned by urlopen
215
    """
216
    from flexget.utils.requests import is_unresponsive, set_unresponsive
218
    if isinstance(url_or_request, urllib2.Request):
219
        url = url_or_request.get_host()
220
    else:
221
        url = url_or_request
222
    if is_unresponsive(url):
223
        msg = '%s is known to be unresponsive, not trying again.' % urlparse(url).hostname
224
        log.warning(msg)
225
        raise urllib2.URLError(msg)
227
    retries = kwargs.get('retries', 3)
228
    timeout = kwargs.get('timeout', 15.0)
233
    oldtimeout = socket.getdefaulttimeout()
234
    try:
235
        socket.setdefaulttimeout(timeout)
237
        handlers = [SmartRedirectHandler()]
238
        if urllib2._opener:
239
            handlers.extend(urllib2._opener.handlers)
240
        if kwargs.get('handlers'):
241
            handlers.extend(kwargs['handlers'])
242
        if len(handlers) > 1:
243
            handler_names = [h.__class__.__name__ for h in handlers]
244
            log.debug('Additional handlers have been specified for this urlopen: %s' % ', '.join(handler_names))
245
        opener = urllib2.build_opener(*handlers).open
246
        for i in range(retries): # retry getting the url up to 3 times.
247
            if i > 0:
248
                time.sleep(3)
249
            try:
250
                retrieved = opener(url_or_request, kwargs.get('data'))
251
            except urllib2.HTTPError, e:
252
                if e.code < 500:
254
                    log.warning('Could not retrieve url (HTTP %s error): %s' % (e.code, e.url))
255
                    raise
256
                log.debug('HTTP error (try %i/%i): %s' % (i + 1, retries, e.code))
257
            except (urllib2.URLError, socket.timeout), e:
258
                if hasattr(e, 'reason'):
259
                    reason = str(e.reason)
260
                else:
261
                    reason = 'N/A'
262
                if reason == 'timed out':
263
                    set_unresponsive(url)
264
                log.debug('Failed to retrieve url (try %i/%i): %s' % (i + 1, retries, reason))
265
            except httplib.IncompleteRead, e:
266
                log.critical('Incomplete read - see python bug 6312')
267
                break
268
            else:
271
                def enter(self):
272
                    return self
274
                def exit(self, exc_type, exc_val, exc_tb):
275
                    self.close()
277
                retrieved.__class__.__enter__ = enter
278
                retrieved.__class__.__exit__ = exit
279
                return retrieved
281
        log.warning('Could not retrieve url: %s' % url_or_request)
282
        raise urllib2.URLError('Could not retrieve url after %s tries.' % retries)
283
    finally:
284
        socket.setdefaulttimeout(oldtimeout)
287
class ReList(list):
288
    """
289
    A list that stores regexps.
291
    You can add compiled or uncompiled regexps to the list.
292
    It will always return the compiled version.
293
    It will compile the text regexps on demand when first accessed.
294
    """
297
    flags = re.IGNORECASE | re.UNICODE
299
    def __init__(self, *args, **kwargs):
300
        """Optional :flags: keyword argument with regexp flags to compile with"""
301
        if 'flags' in kwargs:
302
            self.flags = kwargs['flags']
303
            del kwargs['flags']
304
        list.__init__(self, *args, **kwargs)
306
    def __getitem__(self, k):
307
        item = list.__getitem__(self, k)
308
        if isinstance(item, basestring):
309
            item = re.compile(item, re.IGNORECASE | re.UNICODE)
310
            self[k] = item
311
        return item
313
    def __iter__(self):
314
        for i in range(len(self)):
315
            yield self[i]
318
def make_valid_path(path, windows=None):
319
    """Removes invalid characters from windows pathnames"""
320
    drive, path = ntpath.splitdrive(path)
321
    if windows is None and drive:
323
        windows = True
324
    if windows:
326
        for char in ':<>*?"|':
327
            path = path.replace(char, '')
329
        path = re.sub(r'(?<![\./\\])\.+(?=[/\\]|$)', '', path)
330
    return drive + path
333
def console(text):
334
    """Print to console safely."""
335
    if isinstance(text, str):
336
        print text
337
        return
338
    print unicode(text).encode('utf8')
341
def parse_timedelta(value):
342
    if isinstance(value, timedelta):
344
        return value
345
    if not value:
347
        return timedelta()
348
    amount, unit = value.lower().split(' ')
350
    if not unit.endswith('s'):
351
        unit += 's'
352
    params = {unit: int(amount)}
353
    try:
354
        return timedelta(**params)
355
    except TypeError:
356
        raise ValueError('Invalid time format \'%s\'' % value)