flexget.plugins.metainfo.imdb_lookup
Covered: 229 lines
Missed: 128 lines
Skipped 110 lines
Percent: 64 %
  1
import logging
  2
from datetime import datetime, timedelta
  3
from sqlalchemy import Table, Column, Integer, Float, String, Unicode, Boolean, DateTime
  4
from sqlalchemy.schema import ForeignKey, Index
  5
from sqlalchemy.orm import relation, joinedload_all
  6
from flexget import schema
  7
from flexget.entry import Entry
  8
from flexget.plugin import register_plugin, internet, PluginError
  9
from flexget.manager import Session
 10
from flexget.utils.log import log_once
 11
from flexget.utils.imdb import ImdbSearch, ImdbParser, extract_id, make_url
 12
from flexget.utils.sqlalchemy_utils import table_add_column
 13
from flexget.utils.database import with_session
 14
from flexget.utils.sqlalchemy_utils import table_columns, get_index_by_name
 16
SCHEMA_VER = 1
 18
Base = schema.versioned_base('imdb_lookup', 1)
 22
genres_table = Table('imdb_movie_genres', Base.metadata,
 23
    Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
 24
    Column('genre_id', Integer, ForeignKey('imdb_genres.id')),
 25
    Index('ix_imdb_movie_genres', 'movie_id', 'genre_id'))
 27
languages_table = Table('imdb_movie_languages', Base.metadata,
 28
    Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
 29
    Column('language_id', Integer, ForeignKey('imdb_languages.id')),
 30
    Index('ix_imdb_movie_languages', 'movie_id', 'language_id'))
 32
actors_table = Table('imdb_movie_actors', Base.metadata,
 33
    Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
 34
    Column('actor_id', Integer, ForeignKey('imdb_actors.id')),
 35
    Index('ix_imdb_movie_actors', 'movie_id', 'actor_id'))
 37
directors_table = Table('imdb_movie_directors', Base.metadata,
 38
    Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
 39
    Column('director_id', Integer, ForeignKey('imdb_directors.id')),
 40
    Index('ix_imdb_movie_directors', 'movie_id', 'director_id'))
 43
class Movie(Base):
 45
    __tablename__ = 'imdb_movies'
 47
    id = Column(Integer, primary_key=True)
 48
    title = Column(Unicode)
 49
    url = Column(String, index=True)
 52
    genres = relation('Genre', secondary=genres_table, backref='movies')
 53
    languages = relation('Language', secondary=languages_table, backref='movies')
 54
    actors = relation('Actor', secondary=actors_table, backref='movies')
 55
    directors = relation('Director', secondary=directors_table, backref='movies')
 57
    score = Column(Float)
 58
    votes = Column(Integer)
 59
    year = Column(Integer)
 60
    plot_outline = Column(Unicode)
 61
    mpaa_rating = Column(String, default='')
 62
    photo = Column(String)
 66
    updated = Column(DateTime)
 68
    @property
 69
    def imdb_id(self):
 70
        return extract_id(self.url)
 72
    @property
 73
    def expired(self):
 74
        """
 75
        :return: True if movie details are considered to be expired, ie. need of update
 76
        """
 77
        if self.updated is None:
 78
            log.debug('updated is None: %s' % self)
 79
            return True
 80
        refresh_interval = 2
 81
        if self.year:
 82
            age = (datetime.now().year - self.year)
 83
            refresh_interval += age * 5
 84
            log.debug('movie `%s` age %i expires in %i days' % (self.title, age, refresh_interval))
 85
        return self.updated < datetime.now() - timedelta(days=refresh_interval)
 87
    def __repr__(self):
 88
        return '<Movie(name=%s,votes=%s,year=%s)>' % (self.title, self.votes, self.year)
 91
class Language(Base):
 93
    __tablename__ = 'imdb_languages'
 95
    id = Column(Integer, primary_key=True)
 96
    name = Column(Unicode)
 98
    def __init__(self, name):
 99
        self.name = name
102
class Genre(Base):
104
    __tablename__ = 'imdb_genres'
106
    id = Column(Integer, primary_key=True)
107
    name = Column(String)
109
    def __init__(self, name):
110
        self.name = name
113
class Actor(Base):
115
    __tablename__ = 'imdb_actors'
117
    id = Column(Integer, primary_key=True)
118
    imdb_id = Column(String)
119
    name = Column(Unicode)
121
    def __init__(self, imdb_id, name=None):
122
        self.imdb_id = imdb_id
123
        self.name = name
126
class Director(Base):
128
    __tablename__ = 'imdb_directors'
130
    id = Column(Integer, primary_key=True)
131
    imdb_id = Column(String)
132
    name = Column(Unicode)
134
    def __init__(self, imdb_id, name=None):
135
        self.imdb_id = imdb_id
136
        self.name = name
139
class SearchResult(Base):
141
    __tablename__ = 'imdb_search'
143
    id = Column(Integer, primary_key=True)
144
    title = Column(Unicode, index=True)
145
    url = Column(String)
146
    fails = Column(Boolean, default=False)
148
    @property
149
    def imdb_id(self):
150
        return extract_id(self.url)
152
    def __init__(self, title, url=None):
153
        self.title = title
154
        self.url = url
156
    def __repr__(self):
157
        return '<SearchResult(title=%s,url=%s,fails=%s)>' % (self.title, self.url, self.fails)
159
log = logging.getLogger('imdb_lookup')
162
@schema.upgrade('imdb_lookup')
163
def upgrade(ver, session):
164
    if ver is None:
165
        columns = table_columns('imdb_movies', session)
166
        if not 'photo' in columns:
167
            log.info('Adding photo column to imdb_movies table.')
168
            table_add_column('imdb_movies', 'photo', String, session)
169
        if not 'updated' in columns:
170
            log.info('Adding updated column to imdb_movies table.')
171
            table_add_column('imdb_movies', 'updated', DateTime, session)
172
        if not 'mpaa_rating' in columns:
173
            log.info('Adding mpaa_rating column to imdb_movies table.')
174
            table_add_column('imdb_movies', 'mpaa_rating', String, session)
175
        ver = 0
176
    if ver == 0:
178
        log.info('Adding imdb indexes delivering up to 20x speed increase \o/ ...')
179
        indexes = [get_index_by_name(actors_table, 'ix_imdb_movie_actors'),
180
                   get_index_by_name(genres_table, 'ix_imdb_movie_genres'),
181
                   get_index_by_name(languages_table, 'ix_imdb_movie_languages'),
182
                   get_index_by_name(directors_table, 'ix_imdb_movie_directors')]
183
        for index in indexes:
184
            if index is None:
185
                log.critical('Index adding failure!')
186
                continue
187
            log.info('Creating index %s ...' % index.name)
188
            index.create(bind=session.connection())
189
        ver = 1
190
    return ver
193
class ImdbLookup(object):
194
    """
195
        Retrieves imdb information for entries.
197
        Example:
199
        imdb_lookup: yes
201
        Also provides imdb lookup functionality to all other imdb related plugins.
202
    """
204
    field_map = {
205
        'imdb_url': 'url',
206
        'imdb_id': lambda movie: extract_id(movie.url),
207
        'imdb_name': 'title',
208
        'imdb_photo': 'photo',
209
        'imdb_plot_outline': 'plot_outline',
210
        'imdb_score': 'score',
211
        'imdb_votes': 'votes',
212
        'imdb_year': 'year',
213
        'imdb_genres': lambda movie: [genre.name for genre in movie.genres],
214
        'imdb_languages': lambda movie: [lang.name for lang in movie.languages],
215
        'imdb_actors': lambda movie: dict((actor.imdb_id, actor.name) for actor in movie.actors),
216
        'imdb_directors': lambda movie: dict((director.imdb_id, director.name) for director in movie.directors),
217
        'imdb_mpaa_rating': 'mpaa_rating',
219
        'movie_name': 'title',
220
        'movie_year': 'year'}
222
    def validator(self):
223
        from flexget import validator
224
        return validator.factory('boolean')
226
    def on_feed_metainfo(self, feed, config):
227
        if not config:
228
            return
229
        for entry in feed.entries:
230
            entry.register_lazy_fields(self.field_map, self.lazy_loader)
232
    def lazy_loader(self, entry, field):
233
        """Does the lookup for this entry and populates the entry fields."""
234
        try:
235
            self.lookup(entry)
236
        except PluginError, e:
237
            log_once(e.value.capitalize(), logger=log)
239
            entry.unregister_lazy_fields(self.field_map, self.lazy_loader)
240
        return entry[field]
242
    @with_session
243
    def imdb_id_lookup(self, movie_title=None, raw_title=None, session=None):
244
        """
245
        Perform faster lookup providing just imdb_id.
246
        Falls back to using basic lookup if data cannot be found from cache.
248
        .. note::
250
           API will be changed, it's dumb to return None on errors AND
251
           raise PluginError on some else
253
        :param movie_title: Name of the movie
254
        :param raw_title: Raw entry title
255
        :return: imdb id or None
256
        :raises PluginError: Failure reason
257
        """
258
        if movie_title:
259
            log.debug('imdb_id_lookup: trying with title: %s' % movie_title)
260
            movie = session.query(Movie).filter(Movie.title == movie_title).first()
261
            if movie:
262
                log.debug('--> success! got %s returning %s' % (movie, movie.imdb_id))
263
                return movie.imdb_id
264
        if raw_title:
265
            log.debug('imdb_id_lookup: trying cache with: %s' % raw_title)
266
            result = session.query(SearchResult).filter(SearchResult.title == raw_title).first()
267
            if result:
269
                if result.fails:
270
                    return None
271
                log.debug('--> success! got %s returning %s' % (result, result.imdb_id))
272
                return result.imdb_id
273
        if raw_title:
275
            fake_entry = Entry(raw_title, '')
276
            self.lookup(fake_entry)
277
            return fake_entry['imdb_id']
279
    @internet(log)
280
    def lookup(self, entry, search_allowed=True):
281
        """
282
        Perform imdb lookup for entry.
284
        :param entry: Entry instance
285
        :param search_allowed: Allow fallback to search
286
        :raises PluginError: Failure reason
287
        """
289
        from flexget.manager import manager
291
        if entry.get('imdb_url', eval_lazy=False):
292
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
293
        elif entry.get('imdb_id', eval_lazy=False):
294
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
295
        elif entry.get('title', eval_lazy=False):
296
            log.debug('lookup for %s' % entry['title'])
297
        else:
298
            raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.')
300
        take_a_break = False
301
        session = Session()
303
        try:
305
            for field in ['imdb_votes', 'imdb_score']:
306
                if entry.get(field, eval_lazy=False):
307
                    value = entry[field]
308
                    if not isinstance(value, (int, float)):
309
                        raise PluginError('Entry field %s should be a number!' % field)
312
            if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False):
313
                entry['imdb_url'] = make_url(entry['imdb_id'])
316
            if entry.get('imdb_url', eval_lazy=False):
317
                imdb_id = extract_id(entry['imdb_url'])
318
                if imdb_id:
319
                    entry['imdb_url'] = make_url(imdb_id)
320
                else:
321
                    log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
322
                    del(entry['imdb_url'])
326
            if not entry.get('imdb_url', eval_lazy=False):
327
                result = session.query(SearchResult).\
328
                         filter(SearchResult.title == entry['title']).first()
329
                if result:
330
                    if result.fails and not manager.options.retry:
332
                        log.debug('%s will fail lookup' % entry['title'])
333
                        raise PluginError('Title `%s` lookup fails' % entry['title'])
334
                    else:
335
                        if result.url:
336
                            log.trace('Setting imdb url for %s from db' % entry['title'])
337
                            entry['imdb_url'] = result.url
340
            if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
341
                log.verbose('Searching from imdb `%s`' % entry['title'])
343
                take_a_break = True
344
                search = ImdbSearch()
345
                search_result = search.smart_match(entry['title'])
346
                if search_result:
347
                    entry['imdb_url'] = search_result['url']
350
                    result = SearchResult(entry['title'], entry['imdb_url'])
351
                    session.add(result)
352
                    log.verbose('Found %s' % (entry['imdb_url']))
353
                else:
354
                    log_once('Imdb lookup failed for %s' % entry['title'], log)
356
                    result = SearchResult(entry['title'])
357
                    result.fails = True
358
                    session.add(result)
359
                    raise PluginError('Title `%s` lookup failed' % entry['title'])
362
            movie = session.query(Movie).\
363
                options(joinedload_all(Movie.genres, Movie.languages,
364
                Movie.actors, Movie.directors)).\
365
                filter(Movie.url == entry['imdb_url']).first()
368
            req_parse = False
369
            if not movie:
370
                req_parse = True
371
            elif movie.expired:
372
                req_parse = True
374
            if req_parse:
375
                if movie is not None:
376
                    if movie.expired:
377
                        log.verbose('Movie `%s` details expired, refreshing ...' % movie.title)
379
                    session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()
382
                if 'title' in entry:
383
                    log.verbose('Parsing imdb for `%s`' % entry['title'])
384
                else:
385
                    log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
386
                try:
387
                    take_a_break = True
388
                    movie = self._parse_new_movie(entry['imdb_url'], session)
389
                except UnicodeDecodeError:
390
                    log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url'])
392
                    movie = Movie()
393
                    movie.url = entry['imdb_url']
394
                    session.add(movie)
395
                    raise PluginError('UnicodeDecodeError')
396
                except ValueError, e:
398
                    if manager.options.debug:
399
                        log.exception(e)
400
                    raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log)
402
            for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']:
403
                log.trace('movie.%s: %s' % (att, getattr(movie, att)))
406
            entry.update_using_map(self.field_map, movie)
409
            if (take_a_break and
410
                not manager.options.debug and
411
                not manager.unit_test):
412
                import time
413
                time.sleep(3)
414
        finally:
415
            log.trace('committing session')
416
            session.commit()
418
    def _parse_new_movie(self, imdb_url, session):
419
        """
420
        Get Movie object by parsing imdb page and save movie into the database.
421
        :param imdb_url: Imdb url
422
        :param session: Session to be used
423
        :return: Newly added Movie
424
        """
425
        imdb_parser = ImdbParser()
426
        imdb_parser.parse(imdb_url)
428
        movie = Movie()
429
        movie.photo = imdb_parser.photo
430
        movie.title = imdb_parser.name
431
        movie.score = imdb_parser.score
432
        movie.votes = imdb_parser.votes
433
        movie.year = imdb_parser.year
434
        movie.mpaa_rating = imdb_parser.mpaa_rating
435
        movie.plot_outline = imdb_parser.plot_outline
436
        movie.url = imdb_url
437
        for name in imdb_parser.genres:
438
            genre = session.query(Genre).\
439
            filter(Genre.name == name).first()
440
            if not genre:
441
                genre = Genre(name)
442
            movie.genres.append(genre) # pylint:disable=E1101
443
        for name in imdb_parser.languages:
444
            language = session.query(Language).\
445
            filter(Language.name == name).first()
446
            if not language:
447
                language = Language(name)
448
            movie.languages.append(language) # pylint:disable=E1101
449
        for imdb_id, name in imdb_parser.actors.iteritems():
450
            actor = session.query(Actor).\
451
            filter(Actor.imdb_id == imdb_id).first()
452
            if not actor:
453
                actor = Actor(imdb_id, name)
454
            movie.actors.append(actor) # pylint:disable=E1101
455
        for imdb_id, name in imdb_parser.directors.iteritems():
456
            director = session.query(Director).\
457
            filter(Director.imdb_id == imdb_id).first()
458
            if not director:
459
                director = Director(imdb_id, name)
460
            movie.directors.append(director) # pylint:disable=E1101
462
        movie.updated = datetime.now()
463
        session.add(movie)
464
        return movie
466
register_plugin(ImdbLookup, 'imdb_lookup', api_ver=2)