flexget.plugins.urlrewrite_newzleech
Covered: 17 lines
Missed: 52 lines
Skipped 27 lines
Percent: 24 %
 1
import urllib
 2
import urllib2
 3
import logging
 4
import re
 5
from flexget.entry import Entry
 6
from flexget.plugin import register_plugin, internet
 7
from flexget.utils.soup import get_soup
 8
from flexget.utils.tools import urlopener
10
log = logging.getLogger("newzleech")
13
class UrlRewriteNewzleech(object):
14
    """
15
        UrlRewriter or search by using newzleech.com
16
        TODO: implement basic url rewriting
17
    """
20
    @internet(log)
21
    def search(self, query, comparator, config=None):
23
        url = u'http://newzleech.com/?%s' % str(urllib.urlencode({'q': query.encode('latin1'),
24
                                                                  'm': 'search', 'group': '', 'min': 'min',
25
                                                                  'max': 'max', 'age': '', 'minage': '', 'adv': ''}))
28
        txheaders = {
29
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
30
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
31
            'Accept-Language': 'en-us,en;q=0.5',
32
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
33
            'Keep-Alive': '300',
34
            'Connection': 'keep-alive',
35
        }
37
        req = urllib2.Request(url, headers=txheaders)
38
        page = urlopener(req, log)
40
        soup = get_soup(page)
42
        nzbs = []
44
        for item in soup.findAll('table', attrs={'class': 'contentt'}):
45
            subject_tag = item.find('td', attrs={'class': 'subject'}).next
46
            subject = ''.join(subject_tag.findAll(text=True))
47
            complete = item.find('td', attrs={'class': 'complete'}).contents[0]
48
            size = item.find('td', attrs={'class': 'size'}).contents[0]
49
            nzb_url = 'http://newzleech.com/' + item.find('td', attrs={'class': 'get'}).next.get('href')
53
            regexp = query
54
            wildcardize = [' ', '-']
55
            for wild in wildcardize:
56
                regexp = regexp.replace(wild, '.')
57
            regexp = '.*' + regexp + '.*'
60
            if re.match(regexp, subject):
61
                log.debug('%s matches to regexp' % subject)
62
                if complete != u'100':
63
                    log.debug('Match is incomplete %s from newzleech, skipping ..' % query)
64
                    continue
65
                log.info('Found \'%s\'' % query)
67
                def parse_size(value):
68
                    try:
69
                        num = float(value[:-3])
70
                    except:
71
                        log.error('Failed to parse_size %s' % value)
72
                        return 0
74
                    if 'GB' in value:
75
                        num *= 1024
76
                    if 'KB' in value:
77
                        num /= 1024
78
                    return num
80
                nzb = Entry(title=subject, url=nzb_url, content_size=parse_size(size))
81
                nzb['url'] = nzb_url
82
                nzb['size'] = parse_size(size)
84
                nzbs.append(nzb)
86
        if not nzbs:
87
            log.debug('Unable to find %s' % query)
88
            return
91
        nzbs.sort(reverse=True, key=lambda x: x.get('content_size', 0))
93
        return nzbs
95
register_plugin(UrlRewriteNewzleech, 'newzleech', groups=['search'])