flexget.plugins.module_imdb_lookup
Covered: 93 lines
Missed: 145 lines
Skipped 85 lines
Percent: 39 %
  1
import logging
  2
from optparse import SUPPRESS_HELP
  3
from flexget.plugin import register_plugin, register_parser_option, priority, internet, PluginError, PluginWarning
  4
from flexget.manager import Base, Session
  5
from flexget.utils.log import log_once
  6
from flexget.utils.imdb import ImdbSearch, ImdbParser, extract_id
  7
from sqlalchemy import Table, Column, Integer, Float, String, Unicode, Boolean, DateTime
  8
from sqlalchemy.schema import ForeignKey
  9
from sqlalchemy.orm import relation
 10
from datetime import datetime, timedelta
 13
genres_table = Table('imdb_movie_genres', Base.metadata,
 14
    Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
 15
    Column('genre_id', Integer, ForeignKey('imdb_genres.id')))
 17
languages_table = Table('imdb_movie_languages', Base.metadata,
 18
    Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
 19
    Column('language_id', Integer, ForeignKey('imdb_languages.id')))
 21
actors_table = Table('imdb_movie_actors', Base.metadata,
 22
    Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
 23
    Column('actor_id', Integer, ForeignKey('imdb_actors.id')))
 25
directors_table = Table('imdb_movie_directors', Base.metadata,
 26
    Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
 27
    Column('director_id', Integer, ForeignKey('imdb_directors.id')))
 30
class Movie(Base):
 32
    __tablename__ = 'imdb_movies'
 34
    id = Column(Integer, primary_key=True)
 35
    title = Column(Unicode)
 36
    url = Column(String, index=True)
 39
    genres = relation('Genre', secondary=genres_table, backref='movies')
 40
    languages = relation('Language', secondary=languages_table, backref='movies')
 41
    actors = relation('Actor', secondary=actors_table, backref='movies')
 42
    directors = relation('Director', secondary=directors_table, backref='movies')
 44
    score = Column(Float)
 45
    votes = Column(Integer)
 46
    year = Column(Integer)
 47
    plot_outline = Column(Unicode)
 48
    mpaa_rating = Column(String, default='')
 49
    photo = Column(String)
 53
    updated = Column(DateTime)
 55
    def __repr__(self):
 56
        return '<Movie(name=%s,votes=%s,year=%s)>' % (self.title, self.votes, self.year)
 59
class Language(Base):
 61
    __tablename__ = 'imdb_languages'
 63
    id = Column(Integer, primary_key=True)
 64
    name = Column(Unicode)
 66
    def __init__(self, name):
 67
        self.name = name
 70
class Genre(Base):
 72
    __tablename__ = 'imdb_genres'
 74
    id = Column(Integer, primary_key=True)
 75
    name = Column(String)
 77
    def __init__(self, name):
 78
        self.name = name
 81
class Actor(Base):
 83
    __tablename__ = 'imdb_actors'
 85
    id = Column(Integer, primary_key=True)
 86
    imdb_id = Column(String)
 87
    name = Column(Unicode)
 89
    def __init__(self, imdb_id, name=None):
 90
        self.imdb_id = imdb_id
 91
        self.name = name
 94
class Director(Base):
 96
    __tablename__ = 'imdb_directors'
 98
    id = Column(Integer, primary_key=True)
 99
    imdb_id = Column(String)
100
    name = Column(Unicode)
102
    def __init__(self, imdb_id, name=None):
103
        self.imdb_id = imdb_id
104
        self.name = name
107
class SearchResult(Base):
109
    __tablename__ = 'imdb_search'
111
    id = Column(Integer, primary_key=True)
112
    title = Column(Unicode, index=True)
113
    url = Column(String)
114
    fails = Column(Boolean, default=False)
116
    def __init__(self, title, url=None):
117
        self.title = title
118
        self.url = url
120
log = logging.getLogger('imdb_lookup')
123
class ModuleImdbLookup(object):
124
    """
125
        Retrieves imdb information for entries.
127
        Example:
129
        imdb_lookup: yes
131
        Also provides imdb lookup functionality to all other imdb related plugins.
132
    """
134
    def validator(self):
135
        from flexget import validator
136
        return validator.factory('boolean')
138
    @priority(100)
139
    def on_feed_filter(self, feed):
140
        from flexget.utils.log import log_once
141
        for entry in feed.entries:
142
            try:
143
                self.lookup(feed, entry)
144
            except PluginError, e:
145
                log_once(e.value.capitalize(), logger=log)
146
            except PluginWarning, e:
147
                log_once(e.value.capitalize(), logger=log)
149
    @internet(log)
150
    def lookup(self, feed, entry, search_allowed=True):
151
        """Perform imdb lookup for entry. Raises PluginError with failure reason."""
153
        log.debug('lookup for %s' % entry['title'])
155
        take_a_break = False
156
        session = Session()
158
        try:
160
            for field in ['imdb_votes', 'imdb_score']:
161
                if field in entry:
162
                    value = entry[field]
163
                    if not isinstance(value, int) and not isinstance(value, float):
164
                        raise PluginError('Entry field %s should be a number!' % field)
167
            if 'imdb_url' in entry:
168
                imdb_id = extract_id(entry['imdb_url'])
169
                if imdb_id:
170
                    entry['imdb_url'] = 'http://www.imdb.com/title/%s' % imdb_id
171
                else:
172
                    log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
173
                    del(entry['imdb_url'])
176
            if not 'imdb_url' in entry:
177
                result = session.query(SearchResult).filter(SearchResult.title == entry['title']).first()
178
                if result:
179
                    if result.fails and not feed.manager.options.retry_lookup:
181
                        log.debug('%s will fail lookup' % entry['title'])
182
                        raise PluginError('Title lookup fails')
183
                    else:
184
                        if result.url:
185
                            log.log(5, 'Setting imdb url for %s from db' % entry['title'])
186
                            entry['imdb_url'] = result.url
189
            if not 'imdb_url' in entry and search_allowed:
190
                feed.verbose_progress('Searching from imdb %s' % entry['title'])
192
                take_a_break = True
193
                search = ImdbSearch()
194
                search_result = search.smart_match(entry['title'])
195
                if search_result:
196
                    entry['imdb_url'] = search_result['url']
198
                    result = SearchResult(entry['title'], entry['imdb_url'])
199
                    session.add(result)
200
                    feed.verbose_progress('Found %s' % (entry['imdb_url']), log)
201
                else:
202
                    log_once('Imdb lookup failed for %s' % entry['title'], log)
204
                    result = SearchResult(entry['title'])
205
                    result.fails = True
206
                    session.add(result)
207
                    raise PluginError('Title lookup failed')
209
            imdb = ImdbParser()
212
            cached = session.query(Movie).filter(Movie.url == entry['imdb_url']).first()
213
            if (not cached) or (cached.updated is None) or (cached.updated < datetime.now() - timedelta(days=2)):
215
                session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()
217
                feed.verbose_progress('Parsing imdb for %s' % entry['title'])
218
                try:
219
                    take_a_break = True
220
                    imdb.parse(entry['imdb_url'])
222
                    movie = Movie()
223
                    movie.photo = imdb.photo
224
                    movie.title = imdb.name
225
                    movie.score = imdb.score
226
                    movie.votes = imdb.votes
227
                    movie.year = imdb.year
228
                    movie.mpaa_rating = imdb.mpaa_rating
229
                    movie.plot_outline = imdb.plot_outline
230
                    movie.url = entry['imdb_url']
231
                    for name in imdb.genres:
232
                        genre = session.query(Genre).filter(Genre.name == name).first()
233
                        if not genre:
234
                            genre = Genre(name)
235
                        movie.genres.append(genre) # pylint:disable=E1101
236
                    for name in imdb.languages:
237
                        language = session.query(Language).filter(Language.name == name).first()
238
                        if not language:
239
                            language = Language(name)
240
                        movie.languages.append(language) # pylint:disable=E1101
241
                    for imdb_id, name in imdb.actors.iteritems():
242
                        actor = session.query(Actor).filter(Actor.imdb_id == imdb_id).first()
243
                        if not actor:
244
                            actor = Actor(imdb_id, name)
245
                        movie.actors.append(actor) # pylint:disable=E1101
246
                    for imdb_id, name in imdb.directors.iteritems():
247
                        director = session.query(Director).filter(Director.imdb_id == imdb_id).first()
248
                        if not director:
249
                            director = Director(imdb_id, name)
250
                        movie.directors.append(director) # pylint:disable=E1101
252
                    movie.updated = datetime.now()
253
                    output = session.add(movie)
255
                except UnicodeDecodeError:
256
                    log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url'])
258
                    movie = Movie()
259
                    movie.url = entry['imdb_url']
260
                    session.add(movie)
261
                    raise PluginWarning('UnicodeDecodeError')
262
                except ValueError, e:
264
                    if feed.manager.options.debug:
265
                        log.exception(e)
266
                    raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log)
267
            else:
270
                imdb.url = cached.url
271
                imdb.photo = cached.photo
272
                imdb.name = cached.title
273
                imdb.year = cached.year
274
                imdb.votes = cached.votes
275
                imdb.score = cached.score
276
                imdb.mpaa_rating = cached.mpaa_rating
277
                imdb.plot_outline = cached.plot_outline
278
                imdb.genres = [genre.name for genre in cached.genres]
279
                imdb.languages = [lang.name for lang in cached.languages]
280
                for actor in cached.actors:
281
                    imdb.actors[actor.imdb_id] = actor.name
282
                for director in cached.directors:
283
                    imdb.directors[director.imdb_id] = director.name
285
            if imdb.mpaa_rating is None:
286
                imdb.mpaa_rating = ''
288
            log.log(5, 'imdb.score: %s' % imdb.score)
289
            log.log(5, 'imdb.votes: %s' % imdb.votes)
290
            log.log(5, 'imdb.year: %s' % imdb.year)
291
            log.log(5, 'imdb.genres: %s' % imdb.genres)
292
            log.log(5, 'imdb.languages: %s' % imdb.languages)
293
            log.log(5, 'imdb.actors: %s' % ', '.join(imdb.actors))
294
            log.log(5, 'imdb.directors: %s' % ', '.join(imdb.directors))
295
            log.log(5, 'imdb.mpaa_rating: %s' % ', '.join(imdb.mpaa_rating))
299
            entry['imdb_url'] = imdb.url
300
            entry['imdb_id'] = imdb.imdb_id
301
            entry['imdb_name'] = imdb.name
302
            entry['imdb_photo'] = imdb.photo
303
            entry['imdb_plot_outline'] = imdb.plot_outline
304
            entry['imdb_score'] = imdb.score
305
            entry['imdb_votes'] = imdb.votes
306
            entry['imdb_year'] = imdb.year
307
            entry['imdb_genres'] = imdb.genres
308
            entry['imdb_languages'] = imdb.languages
309
            entry['imdb_actors'] = imdb.actors
310
            entry['imdb_directors'] = imdb.directors
311
            entry['imdb_mpaa_rating'] = imdb.mpaa_rating
314
            if take_a_break and not feed.manager.options.debug and not feed.manager.unit_test:
315
                import time
316
                time.sleep(3)
317
        finally:
318
            log.log(5, 'committing session')
319
            session.commit()
321
register_plugin(ModuleImdbLookup, 'imdb_lookup')
322
register_parser_option('--retry-lookup', action='store_true', dest='retry_lookup', default=0, help=SUPPRESS_HELP)