2
from datetime import datetime, timedelta
3
from sqlalchemy import Table, Column, Integer, Float, String, Unicode, Boolean, DateTime
4
from sqlalchemy.schema import ForeignKey, Index
5
from sqlalchemy.orm import relation, joinedload_all
6
from flexget import schema
7
from flexget.entry import Entry
8
from flexget.plugin import register_plugin, internet, PluginError
9
from flexget.manager import Session
10
from flexget.utils.log import log_once
11
from flexget.utils.imdb import ImdbSearch, ImdbParser, extract_id, make_url
12
from flexget.utils.sqlalchemy_utils import table_add_column
13
from flexget.utils.database import with_session
14
from flexget.utils.sqlalchemy_utils import table_columns, get_index_by_name
18
Base = schema.versioned_base('imdb_lookup', 1)
22
genres_table = Table('imdb_movie_genres', Base.metadata,
23
Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
24
Column('genre_id', Integer, ForeignKey('imdb_genres.id')),
25
Index('ix_imdb_movie_genres', 'movie_id', 'genre_id'))
27
languages_table = Table('imdb_movie_languages', Base.metadata,
28
Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
29
Column('language_id', Integer, ForeignKey('imdb_languages.id')),
30
Index('ix_imdb_movie_languages', 'movie_id', 'language_id'))
32
actors_table = Table('imdb_movie_actors', Base.metadata,
33
Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
34
Column('actor_id', Integer, ForeignKey('imdb_actors.id')),
35
Index('ix_imdb_movie_actors', 'movie_id', 'actor_id'))
37
directors_table = Table('imdb_movie_directors', Base.metadata,
38
Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
39
Column('director_id', Integer, ForeignKey('imdb_directors.id')),
40
Index('ix_imdb_movie_directors', 'movie_id', 'director_id'))
45
__tablename__ = 'imdb_movies'
47
id = Column(Integer, primary_key=True)
48
title = Column(Unicode)
49
url = Column(String, index=True)
51
# many-to-many relations
52
genres = relation('Genre', secondary=genres_table, backref='movies')
53
languages = relation('Language', secondary=languages_table, backref='movies')
54
actors = relation('Actor', secondary=actors_table, backref='movies')
55
directors = relation('Director', secondary=directors_table, backref='movies')
58
votes = Column(Integer)
59
year = Column(Integer)
60
plot_outline = Column(Unicode)
61
mpaa_rating = Column(String, default='')
62
photo = Column(String)
64
# updated time, so we can grab new rating counts after 48 hours
65
# set a default, so existing data gets updated with a rating
66
updated = Column(DateTime)
70
return extract_id(self.url)
75
:return: True if movie details are considered to be expired, ie. need of update
77
if self.updated is None:
78
log.debug('updated is None: %s' % self)
82
age = (datetime.now().year - self.year)
83
refresh_interval += age * 5
84
log.debug('movie `%s` age %i expires in %i days' % (self.title, age, refresh_interval))
85
return self.updated < datetime.now() - timedelta(days=refresh_interval)
88
return '<Movie(name=%s,votes=%s,year=%s)>' % (self.title, self.votes, self.year)
93
__tablename__ = 'imdb_languages'
95
id = Column(Integer, primary_key=True)
96
name = Column(Unicode)
98
def __init__(self, name):
104
__tablename__ = 'imdb_genres'
106
id = Column(Integer, primary_key=True)
107
name = Column(String)
109
def __init__(self, name):
115
__tablename__ = 'imdb_actors'
117
id = Column(Integer, primary_key=True)
118
imdb_id = Column(String)
119
name = Column(Unicode)
121
def __init__(self, imdb_id, name=None):
122
self.imdb_id = imdb_id
128
__tablename__ = 'imdb_directors'
130
id = Column(Integer, primary_key=True)
131
imdb_id = Column(String)
132
name = Column(Unicode)
134
def __init__(self, imdb_id, name=None):
135
self.imdb_id = imdb_id
139
class SearchResult(Base):
141
__tablename__ = 'imdb_search'
143
id = Column(Integer, primary_key=True)
144
title = Column(Unicode, index=True)
146
fails = Column(Boolean, default=False)
150
return extract_id(self.url)
152
def __init__(self, title, url=None):
157
return '<SearchResult(title=%s,url=%s,fails=%s)>' % (self.title, self.url, self.fails)
159
log = logging.getLogger('imdb_lookup')
162
@schema.upgrade('imdb_lookup')
163
def upgrade(ver, session):
165
columns = table_columns('imdb_movies', session)
166
if not 'photo' in columns:
167
log.info('Adding photo column to imdb_movies table.')
168
table_add_column('imdb_movies', 'photo', String, session)
169
if not 'updated' in columns:
170
log.info('Adding updated column to imdb_movies table.')
171
table_add_column('imdb_movies', 'updated', DateTime, session)
172
if not 'mpaa_rating' in columns:
173
log.info('Adding mpaa_rating column to imdb_movies table.')
174
table_add_column('imdb_movies', 'mpaa_rating', String, session)
177
# create indexes retrospectively (~r2563)
178
log.info('Adding imdb indexes delivering up to 20x speed increase \o/ ...')
179
indexes = [get_index_by_name(actors_table, 'ix_imdb_movie_actors'),
180
get_index_by_name(genres_table, 'ix_imdb_movie_genres'),
181
get_index_by_name(languages_table, 'ix_imdb_movie_languages'),
182
get_index_by_name(directors_table, 'ix_imdb_movie_directors')]
183
for index in indexes:
185
log.critical('Index adding failure!')
187
log.info('Creating index %s ...' % index.name)
188
index.create(bind=session.connection())
193
class ImdbLookup(object):
195
Retrieves imdb information for entries.
201
Also provides imdb lookup functionality to all other imdb related plugins.
206
'imdb_id': lambda movie: extract_id(movie.url),
207
'imdb_name': 'title',
208
'imdb_photo': 'photo',
209
'imdb_plot_outline': 'plot_outline',
210
'imdb_score': 'score',
211
'imdb_votes': 'votes',
213
'imdb_genres': lambda movie: [genre.name for genre in movie.genres],
214
'imdb_languages': lambda movie: [lang.name for lang in movie.languages],
215
'imdb_actors': lambda movie: dict((actor.imdb_id, actor.name) for actor in movie.actors),
216
'imdb_directors': lambda movie: dict((director.imdb_id, director.name) for director in movie.directors),
217
'imdb_mpaa_rating': 'mpaa_rating',
218
# Generic fields filled by all movie lookup plugins:
219
'movie_name': 'title',
220
'movie_year': 'year'}
223
from flexget import validator
224
return validator.factory('boolean')
226
def on_feed_metainfo(self, feed, config):
229
for entry in feed.entries:
230
entry.register_lazy_fields(self.field_map, self.lazy_loader)
232
def lazy_loader(self, entry, field):
233
"""Does the lookup for this entry and populates the entry fields."""
236
except PluginError, e:
237
log_once(e.value.capitalize(), logger=log)
238
# Set all of our fields to None if the lookup failed
239
entry.unregister_lazy_fields(self.field_map, self.lazy_loader)
243
def imdb_id_lookup(self, movie_title=None, raw_title=None, session=None):
245
Perform faster lookup providing just imdb_id.
246
Falls back to using basic lookup if data cannot be found from cache.
250
API will be changed, it's dumb to return None on errors AND
251
raise PluginError on some else
253
:param movie_title: Name of the movie
254
:param raw_title: Raw entry title
255
:return: imdb id or None
256
:raises PluginError: Failure reason
259
log.debug('imdb_id_lookup: trying with title: %s' % movie_title)
260
movie = session.query(Movie).filter(Movie.title == movie_title).first()
262
log.debug('--> success! got %s returning %s' % (movie, movie.imdb_id))
265
log.debug('imdb_id_lookup: trying cache with: %s' % raw_title)
266
result = session.query(SearchResult).filter(SearchResult.title == raw_title).first()
268
# this title is hopeless, give up ..
271
log.debug('--> success! got %s returning %s' % (result, result.imdb_id))
272
return result.imdb_id
274
# last hope with hacky lookup
275
fake_entry = Entry(raw_title, '')
276
self.lookup(fake_entry)
277
return fake_entry['imdb_id']
280
def lookup(self, entry, search_allowed=True):
282
Perform imdb lookup for entry.
284
:param entry: Entry instance
285
:param search_allowed: Allow fallback to search
286
:raises PluginError: Failure reason
289
from flexget.manager import manager
291
if entry.get('imdb_url', eval_lazy=False):
292
log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
293
elif entry.get('imdb_id', eval_lazy=False):
294
log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
295
elif entry.get('title', eval_lazy=False):
296
log.debug('lookup for %s' % entry['title'])
298
raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.')
304
# entry sanity checks
305
for field in ['imdb_votes', 'imdb_score']:
306
if entry.get(field, eval_lazy=False):
308
if not isinstance(value, (int, float)):
309
raise PluginError('Entry field %s should be a number!' % field)
311
# if imdb_id is included, build the url.
312
if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False):
313
entry['imdb_url'] = make_url(entry['imdb_id'])
315
# make sure imdb url is valid
316
if entry.get('imdb_url', eval_lazy=False):
317
imdb_id = extract_id(entry['imdb_url'])
319
entry['imdb_url'] = make_url(imdb_id)
321
log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
322
del(entry['imdb_url'])
324
# no imdb_url, check if there is cached result for it or if the
325
# search is known to fail
326
if not entry.get('imdb_url', eval_lazy=False):
327
result = session.query(SearchResult).\
328
filter(SearchResult.title == entry['title']).first()
330
if result.fails and not manager.options.retry:
331
# this movie cannot be found, not worth trying again ...
332
log.debug('%s will fail lookup' % entry['title'])
333
raise PluginError('Title `%s` lookup fails' % entry['title'])
336
log.trace('Setting imdb url for %s from db' % entry['title'])
337
entry['imdb_url'] = result.url
339
# no imdb url, but information required, try searching
340
if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
341
log.verbose('Searching from imdb `%s`' % entry['title'])
344
search = ImdbSearch()
345
search_result = search.smart_match(entry['title'])
347
entry['imdb_url'] = search_result['url']
348
# store url for this movie, so we don't have to search on
350
result = SearchResult(entry['title'], entry['imdb_url'])
352
log.verbose('Found %s' % (entry['imdb_url']))
354
log_once('Imdb lookup failed for %s' % entry['title'], log)
355
# store FAIL for this title
356
result = SearchResult(entry['title'])
359
raise PluginError('Title `%s` lookup failed' % entry['title'])
361
# check if this imdb page has been parsed & cached
362
movie = session.query(Movie).\
363
options(joinedload_all(Movie.genres, Movie.languages,
364
Movie.actors, Movie.directors)).\
365
filter(Movie.url == entry['imdb_url']).first()
367
# determine whether or not movie details needs to be parsed
375
if movie is not None:
377
log.verbose('Movie `%s` details expired, refreshing ...' % movie.title)
378
# Remove the old movie, we'll store another one later.
379
session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()
381
# search and store to cache
383
log.verbose('Parsing imdb for `%s`' % entry['title'])
385
log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
388
movie = self._parse_new_movie(entry['imdb_url'], session)
389
except UnicodeDecodeError:
390
log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url'])
391
# store cache so this will not be tried again
393
movie.url = entry['imdb_url']
395
raise PluginError('UnicodeDecodeError')
396
except ValueError, e:
397
# TODO: might be a little too broad catch, what was this for anyway? ;P
398
if manager.options.debug:
400
raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log)
402
for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']:
403
log.trace('movie.%s: %s' % (att, getattr(movie, att)))
406
entry.update_using_map(self.field_map, movie)
408
# give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1)
410
not manager.options.debug and
411
not manager.unit_test):
415
log.trace('committing session')
418
def _parse_new_movie(self, imdb_url, session):
420
Get Movie object by parsing imdb page and save movie into the database.
421
:param imdb_url: Imdb url
422
:param session: Session to be used
423
:return: Newly added Movie
425
imdb_parser = ImdbParser()
426
imdb_parser.parse(imdb_url)
429
movie.photo = imdb_parser.photo
430
movie.title = imdb_parser.name
431
movie.score = imdb_parser.score
432
movie.votes = imdb_parser.votes
433
movie.year = imdb_parser.year
434
movie.mpaa_rating = imdb_parser.mpaa_rating
435
movie.plot_outline = imdb_parser.plot_outline
437
for name in imdb_parser.genres:
438
genre = session.query(Genre).\
439
filter(Genre.name == name).first()
442
movie.genres.append(genre) # pylint:disable=E1101
443
for name in imdb_parser.languages:
444
language = session.query(Language).\
445
filter(Language.name == name).first()
447
language = Language(name)
448
movie.languages.append(language) # pylint:disable=E1101
449
for imdb_id, name in imdb_parser.actors.iteritems():
450
actor = session.query(Actor).\
451
filter(Actor.imdb_id == imdb_id).first()
453
actor = Actor(imdb_id, name)
454
movie.actors.append(actor) # pylint:disable=E1101
455
for imdb_id, name in imdb_parser.directors.iteritems():
456
director = session.query(Director).\
457
filter(Director.imdb_id == imdb_id).first()
459
director = Director(imdb_id, name)
460
movie.directors.append(director) # pylint:disable=E1101
461
# so that we can track how long since we've updated the info later
462
movie.updated = datetime.now()
466
register_plugin(ImdbLookup, 'imdb_lookup', api_ver=2)