flexget.plugins.urlrewrite_piratebay
Covered: 38 lines
Missed: 60 lines
Skipped 21 lines
Percent: 38 %
  1
import re
  2
import urllib
  3
import logging
  4
from plugin_urlrewriting import UrlRewritingError
  5
from flexget.entry import Entry
  6
from flexget.plugin import register_plugin, internet, PluginWarning
  7
from flexget.utils.tools import urlopener
  8
from flexget.utils.soup import get_soup
  9
from flexget.utils.search import torrent_availability, StringComparator
 11
log = logging.getLogger('piratebay')
 14
class UrlRewritePirateBay(object):
 15
    """PirateBay urlrewriter."""
 18
    def url_rewritable(self, feed, entry):
 19
        url = entry['url']
 20
        if url.endswith('.torrent'):
 21
            return False
 22
        if url.startswith('http://thepiratebay.org/'):
 23
            return True
 24
        if url.startswith('http://torrents.thepiratebay.org/'):
 25
            return True
 26
        return False
 29
    def url_rewrite(self, feed, entry):
 30
        if not 'url' in entry:
 31
            log.error("Didn't actually get a URL...")
 32
        else:
 33
            log.debug("Got the URL: %s" % entry['url'])
 34
        if entry['url'].startswith('http://thepiratebay.org/search/'):
 36
            try:
 37
                entry['url'] = self.search_title(entry['title'])[0]['url']
 38
            except PluginWarning, e:
 39
                raise UrlRewritingError(e)
 40
        else:
 42
            entry['url'] = self.parse_download_page(entry['url'])
 44
    @internet(log)
 45
    def parse_download_page(self, url):
 46
        page = urlopener(url, log)
 47
        try:
 48
            soup = get_soup(page)
 49
            tag_div = soup.find('div', attrs={'class': 'download'})
 50
            if not tag_div:
 51
                raise UrlRewritingError('Unable to locate download link from url %s' % url)
 52
            tag_a = tag_div.find('a')
 53
            torrent_url = tag_a.get('href')
 54
            return torrent_url
 55
        except Exception, e:
 56
            raise UrlRewritingError(e)
 59
    def search(self, query, comparator, config=None):
 60
        entries = self.search_title(query, comparator)
 61
        log.debug('search got %d results' % len(entries))
 62
        return entries
 64
    @internet(log)
 65
    def search_title(self, name, comparator=StringComparator(), url=None):
 66
        """
 67
            Search for name from piratebay.
 68
            If optional search :url: is passed it will be used instead of internal search.
 69
        """
 71
        comparator.set_seq1(name)
 72
        name = comparator.search_string()
 73
        if not url:
 75
            url = 'http://thepiratebay.org/search/' + urllib.quote(name.encode('utf-8')) + '/0/7/0'
 76
            log.debug('Using %s as piratebay search url' % url)
 77
        page = urlopener(url, log)
 78
        soup = get_soup(page)
 79
        entries = []
 80
        for link in soup.findAll('a', attrs={'class': 'detLink'}):
 81
            comparator.set_seq2(link.contents[0])
 82
            log.debug('name: %s' % comparator.a)
 83
            log.debug('found name: %s' % comparator.b)
 84
            log.debug('confidence: %s' % comparator.ratio())
 85
            if not comparator.matches():
 86
                continue
 87
            entry = Entry()
 88
            entry['title'] = link.contents[0]
 89
            entry['url'] = 'http://thepiratebay.org' + link.get('href')
 90
            tds = link.parent.parent.parent.findAll('td')
 91
            entry['torrent_seeds'] = int(tds[-2].contents[0])
 92
            entry['torrent_leeches'] = int(tds[-1].contents[0])
 93
            entry['search_ratio'] = comparator.ratio()
 94
            entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
 96
            size = link.findNext(attrs={'class': 'detDesc'}).contents[0]
 97
            size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size)
 98
            if size:
 99
                if size.group(2) == 'G':
100
                    entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2)
101
                elif size.group(2) == 'M':
102
                    entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2)
103
                else:
104
                    entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2)
105
            entries.append(entry)
107
        if not entries:
108
            dashindex = name.rfind('-')
109
            if dashindex != -1:
110
                return self.search_title(name[:dashindex], comparator=comparator)
111
            else:
112
                raise PluginWarning('No close matches for %s' % name, log, log_once=True)
114
        entries.sort(reverse=True, key=lambda x: x.get('search_sort'))
116
        return entries
118
register_plugin(UrlRewritePirateBay, 'piratebay', groups=['urlrewriter', 'search'])