flexget.plugins.input.rlslog
Covered: 34 lines
Missed: 65 lines
Skipped 31 lines
Percent: 34 %
  1
import logging
  2
import time
  3
from requests import RequestException
  4
from BeautifulSoup import NavigableString
  5
from flexget.entry import Entry
  6
from flexget.plugin import register_plugin, internet, get_plugin_by_name, PluginError
  7
from flexget.utils.log import log_once
  8
from flexget.utils.soup import get_soup
  9
from flexget.utils.cached_input import cached
 11
log = logging.getLogger('rlslog')
 14
class RlsLog(object):
 15
    """
 16
    Adds support for rlslog.net as a feed.
 17
    """
 19
    def validator(self):
 20
        from flexget import validator
 21
        return validator.factory('url')
 23
    def parse_rlslog(self, rlslog_url, feed):
 24
        """
 25
        :param rlslog_url: Url to parse from
 26
        :param feed: Feed instance
 27
        :return: List of release dictionaries
 28
        """
 31
        soup = get_soup(feed.requests.get(rlslog_url, timeout=25, config={'decode_unicode': False}).content)
 33
        releases = []
 34
        for entry in soup.findAll('div', attrs={'class': 'entry'}):
 35
            release = {}
 36
            h3 = entry.find('h3', attrs={'class': 'entrytitle'})
 37
            if not h3:
 38
                log.debug('FAIL: No h3 entrytitle')
 39
                continue
 40
            release['title'] = h3.a.contents[0].strip()
 41
            entrybody = entry.find('div', attrs={'class': 'entrybody'})
 42
            if not entrybody:
 43
                log.debug('FAIL: No entrybody')
 44
                continue
 46
            log.trace('Processing title %s' % (release['title']))
 48
            for link in entrybody.findAll('a'):
 49
                if not link.contents:
 50
                    log.trace('link content empty, skipping')
 51
                    continue
 52
                if not link.has_key('href'):
 53
                    log.trace('link %s missing href' % link)
 54
                    continue
 56
                link_name = link.contents[0]
 57
                link_name_ok = True
 58
                if link_name is None:
 59
                    log.trace('link_name is none')
 60
                    link_name_ok = False
 61
                if not isinstance(link_name, NavigableString):
 62
                    log.trace('link_name is not NavigableString')
 63
                    link_name_ok = False
 65
                link_href = link['href']
 68
                if link_name_ok:
 69
                    link_name = link_name.strip().lower()
 70
                    if link_name == 'imdb':
 71
                        release['imdb_url'] = link_href
 74
                temp = {'title': release['title'], 'url': link_href}
 75
                urlrewriting = get_plugin_by_name('urlrewriting')
 76
                if urlrewriting['instance'].url_rewritable(feed, temp):
 77
                    release['url'] = link_href
 78
                    log.trace('--> accepting %s (known url pattern)' % link_href)
 79
                else:
 80
                    log.trace('<-- ignoring %s (unknown url pattern)' % link_href)
 83
            if not 'url' in release:
 84
                log_once('%s skipped due to missing or unsupported download link' % (release['title']), log)
 85
            else:
 86
                releases.append(release)
 88
        return releases
 90
    @cached('rlslog')
 91
    @internet(log)
 92
    def on_feed_input(self, feed, config):
 93
        url = config
 94
        if url.endswith('feed/'):
 95
            raise PluginError('Invalid URL. Remove trailing feed/ from the url.')
 97
        releases = []
 98
        entries = []
101
        for number in range(2):
102
            try:
103
                releases = self.parse_rlslog(url, feed)
104
                break
105
            except RequestException, e:
106
                if number == 1:
107
                    raise
108
                else:
109
                    log.verbose('Error receiving content, retrying in 5s. Try [%s of 2]. Error: %s' % (number + 1, e))
110
                    time.sleep(5)
113
        for release in releases:
114
            entry = Entry()
116
            def apply_field(d_from, d_to, f):
117
                if f in d_from:
118
                    if d_from[f] is None:
119
                        return # None values are not wanted!
120
                    d_to[f] = d_from[f]
122
            for field in ('title', 'url', 'imdb_url'):
123
                apply_field(release, entry, field)
125
            entries.append(entry)
127
        return entries
129
register_plugin(RlsLog, 'rlslog', api_ver=2)