flexget.plugins.input_rlslog
Covered: 26 lines
Missed: 102 lines
Skipped 33 lines
Percent: 20 %
  1
import urllib2
  2
import logging
  3
import re
  4
from flexget.feed import Entry
  5
from flexget.plugin import *
  6
from flexget.utils.log import log_once
  7
from flexget.utils.soup import get_soup
  8
from flexget.utils.cached_input import cached
  9
from BeautifulSoup import NavigableString
 10
from flexget.utils.tools import urlopener
 12
log = logging.getLogger('rlslog')
 15
class RlsLog:
 16
    """
 17
        Adds support for rlslog.net as a feed.
 19
        In case of movies the plugin supplies pre-parses IMDB-details
 20
        (helps when chaining with filter_imdb).
 21
    """
 23
    def validator(self):
 24
        from flexget import validator
 25
        return validator.factory('url')
 27
    def parse_imdb(self, s):
 28
        score = None
 29
        votes = None
 30
        re_votes = re.compile('\((\d*).votes\)', re.IGNORECASE)
 31
        re_score = [re.compile('(\d\.\d)'), re.compile('(\d)/10')]
 32
        for r in re_score:
 33
            f = r.search(s)
 34
            if f is not None:
 35
                score = float(f.group(1))
 36
                break
 37
        f = re_votes.search(s.replace(',', ''))
 38
        if f is not None:
 39
            votes = int(f.group(1))
 41
        return (score, votes)
 43
    def parse_rlslog(self, rlslog_url, feed):
 44
        """Parse configured url and return releases array"""
 46
        page = urlopener(rlslog_url, log)
 47
        soup = get_soup(page)
 49
        releases = []
 50
        for entry in soup.findAll('div', attrs={'class': 'entry'}):
 51
            release = {}
 52
            h3 = entry.find('h3', attrs={'class': 'entrytitle'})
 53
            if not h3:
 54
                log.debug('FAIL: No h3 entrytitle')
 55
                continue
 56
            release['title'] = h3.a.contents[0].strip()
 57
            entrybody = entry.find('div', attrs={'class': 'entrybody'})
 58
            if not entrybody:
 59
                log.debug('FAIL: No entrybody')
 60
                continue
 62
            log.debugall('Processing title %s' % (release['title']))
 64
            rating = entrybody.find('strong', text=re.compile(r'imdb rating:', re.IGNORECASE))
 65
            if rating is not None:
 66
                score_raw = rating.next.string
 67
                if score_raw is not None:
 68
                    release['imdb_score'], release['imdb_votes'] = self.parse_imdb(score_raw)
 70
            for link in entrybody.findAll('a'):
 71
                if not link.contents:
 72
                    log.debugall('link content empty, skipping')
 73
                    continue
 74
                if not link.has_key('href'):
 75
                    log.debugall('link %s missing href' % link)
 76
                    continue
 78
                link_name = link.contents[0]
 79
                link_name_ok = True
 80
                if link_name is None:
 81
                    log.debugall('link_name is none')
 82
                    link_name_ok = False
 83
                if not isinstance(link_name, NavigableString):
 84
                    log.debugall('link_name is NavigableString')
 85
                    link_name_ok = False
 87
                link_href = link['href']
 90
                if link_name_ok:
 91
                    link_name = link_name.strip().lower()
 92
                    if link_name == 'imdb':
 93
                        release['imdb_url'] = link_href
 94
                        score_raw = link.next.next.string
 95
                        if not 'imdb_score' in release and not 'imdb_votes' in release and score_raw is not None:
 96
                            release['imdb_score'], release['imdb_votes'] = self.parse_imdb(score_raw)
 99
                temp = {}
100
                temp['title'] = release['title']
101
                temp['url'] = link_href
102
                urlrewriting = get_plugin_by_name('urlrewriting')
103
                if urlrewriting['instance'].url_rewritable(feed, temp):
104
                    release['url'] = link_href
105
                    log.debugall('--> accepting %s (known url pattern)' % link_href)
106
                else:
107
                    log.debugall('<-- ignoring %s (unknown url pattern)' % link_href)
110
            if not 'url' in release:
111
                log_once('%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']), log)
112
            else:
113
                releases.append(release)
115
        return releases
117
    @cached('rlslog', 'url')
118
    @internet(log)
119
    def on_feed_input(self, feed):
120
        url = feed.get_input_url('rlslog')
121
        if url.endswith('feed/'):
122
            raise PluginWarning('Invalid URL. Remove trailing feed/ from the url.')
124
        releases = []
127
        for number in range(2):
128
            try:
129
                releases = self.parse_rlslog(url, feed)
130
            except urllib2.HTTPError, e:
131
                if number == 1:
132
                    raise
133
                else:
134
                    import time
135
                    feed.verbose_progress('Error recieving content, retrying in 5s. Try [%s of 3]. HTTP Error Code: %s' % (str(number + 1), str(e.code)))
136
                    time.sleep(5)
137
            except urllib2.URLError, e:
138
                if number == 2:
139
                    raise
140
                else:
141
                    import time
142
                    feed.verbose_progress('Error retrieving the URL, retrying in 5s. Try [%s of 3]. Error: %s' % (str(number + 1), str(e.reason)))
143
                    time.sleep(5)
145
        for release in releases:
147
            entry = Entry()
149
            def apply_field(d_from, d_to, f):
150
                if f in d_from:
151
                    if d_from[f] is None:
152
                        return # None values are not wanted!
153
                    d_to[f] = d_from[f]
155
            for field in ['title', 'url', 'imdb_url', 'imdb_score', 'imdb_votes']:
156
                apply_field(release, entry, field)
158
            feed.entries.append(entry)
160
register_plugin(RlsLog, 'rlslog')