2
from optparse import SUPPRESS_HELP
3
from flexget.plugin import register_plugin, register_parser_option, priority, internet, PluginError, PluginWarning
4
from flexget.manager import Base, Session
5
from flexget.utils.log import log_once
6
from flexget.utils.imdb import ImdbSearch, ImdbParser, extract_id
7
from sqlalchemy import Table, Column, Integer, Float, String, Unicode, Boolean, DateTime
8
from sqlalchemy.schema import ForeignKey
9
from sqlalchemy.orm import relation
10
from datetime import datetime, timedelta
13
genres_table = Table('imdb_movie_genres', Base.metadata,
14
Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
15
Column('genre_id', Integer, ForeignKey('imdb_genres.id')))
17
languages_table = Table('imdb_movie_languages', Base.metadata,
18
Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
19
Column('language_id', Integer, ForeignKey('imdb_languages.id')))
21
actors_table = Table('imdb_movie_actors', Base.metadata,
22
Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
23
Column('actor_id', Integer, ForeignKey('imdb_actors.id')))
25
directors_table = Table('imdb_movie_directors', Base.metadata,
26
Column('movie_id', Integer, ForeignKey('imdb_movies.id')),
27
Column('director_id', Integer, ForeignKey('imdb_directors.id')))
32
__tablename__ = 'imdb_movies'
34
id = Column(Integer, primary_key=True)
35
title = Column(Unicode)
36
url = Column(String, index=True)
38
# many-to-many relations
39
genres = relation('Genre', secondary=genres_table, backref='movies')
40
languages = relation('Language', secondary=languages_table, backref='movies')
41
actors = relation('Actor', secondary=actors_table, backref='movies')
42
directors = relation('Director', secondary=directors_table, backref='movies')
45
votes = Column(Integer)
46
year = Column(Integer)
47
plot_outline = Column(Unicode)
48
mpaa_rating = Column(String, default='')
49
photo = Column(String)
51
# updated time, so we can grab new rating counts after 48 hours
52
# set a default, so existing data gets updated with a rating
53
updated = Column(DateTime)
56
return '<Movie(name=%s,votes=%s,year=%s)>' % (self.title, self.votes, self.year)
61
__tablename__ = 'imdb_languages'
63
id = Column(Integer, primary_key=True)
64
name = Column(Unicode)
66
def __init__(self, name):
72
__tablename__ = 'imdb_genres'
74
id = Column(Integer, primary_key=True)
77
def __init__(self, name):
83
__tablename__ = 'imdb_actors'
85
id = Column(Integer, primary_key=True)
86
imdb_id = Column(String)
87
name = Column(Unicode)
89
def __init__(self, imdb_id, name=None):
90
self.imdb_id = imdb_id
96
__tablename__ = 'imdb_directors'
98
id = Column(Integer, primary_key=True)
99
imdb_id = Column(String)
100
name = Column(Unicode)
102
def __init__(self, imdb_id, name=None):
103
self.imdb_id = imdb_id
107
class SearchResult(Base):
109
__tablename__ = 'imdb_search'
111
id = Column(Integer, primary_key=True)
112
title = Column(Unicode, index=True)
114
fails = Column(Boolean, default=False)
116
def __init__(self, title, url=None):
120
log = logging.getLogger('imdb_lookup')
123
class ModuleImdbLookup(object):
125
Retrieves imdb information for entries.
131
Also provides imdb lookup functionality to all other imdb related plugins.
135
from flexget import validator
136
return validator.factory('boolean')
139
def on_feed_filter(self, feed):
140
from flexget.utils.log import log_once
141
for entry in feed.entries:
143
self.lookup(feed, entry)
144
except PluginError, e:
145
log_once(e.value.capitalize(), logger=log)
146
except PluginWarning, e:
147
log_once(e.value.capitalize(), logger=log)
150
def lookup(self, feed, entry, search_allowed=True):
151
"""Perform imdb lookup for entry. Raises PluginError with failure reason."""
153
log.debug('lookup for %s' % entry['title'])
159
# entry sanity checks
160
for field in ['imdb_votes', 'imdb_score']:
163
if not isinstance(value, int) and not isinstance(value, float):
164
raise PluginError('Entry field %s should be a number!' % field)
166
# make sure imdb url is valid
167
if 'imdb_url' in entry:
168
imdb_id = extract_id(entry['imdb_url'])
170
entry['imdb_url'] = 'http://www.imdb.com/title/%s' % imdb_id
172
log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
173
del(entry['imdb_url'])
175
# no imdb_url, check if there is cached result for it or if the search is known to fail
176
if not 'imdb_url' in entry:
177
result = session.query(SearchResult).filter(SearchResult.title == entry['title']).first()
179
if result.fails and not feed.manager.options.retry_lookup:
180
# this movie cannot be found, not worth trying again ...
181
log.debug('%s will fail lookup' % entry['title'])
182
raise PluginError('Title lookup fails')
185
log.log(5, 'Setting imdb url for %s from db' % entry['title'])
186
entry['imdb_url'] = result.url
188
# no imdb url, but information required, try searching
189
if not 'imdb_url' in entry and search_allowed:
190
feed.verbose_progress('Searching from imdb %s' % entry['title'])
193
search = ImdbSearch()
194
search_result = search.smart_match(entry['title'])
196
entry['imdb_url'] = search_result['url']
197
# store url for this movie, so we don't have to search on every run
198
result = SearchResult(entry['title'], entry['imdb_url'])
200
feed.verbose_progress('Found %s' % (entry['imdb_url']), log)
202
log_once('Imdb lookup failed for %s' % entry['title'], log)
203
# store FAIL for this title
204
result = SearchResult(entry['title'])
207
raise PluginError('Title lookup failed')
211
# check if this imdb page has been parsed & cached
212
cached = session.query(Movie).filter(Movie.url == entry['imdb_url']).first()
213
if (not cached) or (cached.updated is None) or (cached.updated < datetime.now() - timedelta(days=2)):
214
# Remove the old movie, we'll store another one later.
215
session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()
216
# search and store to cache
217
feed.verbose_progress('Parsing imdb for %s' % entry['title'])
220
imdb.parse(entry['imdb_url'])
223
movie.photo = imdb.photo
224
movie.title = imdb.name
225
movie.score = imdb.score
226
movie.votes = imdb.votes
227
movie.year = imdb.year
228
movie.mpaa_rating = imdb.mpaa_rating
229
movie.plot_outline = imdb.plot_outline
230
movie.url = entry['imdb_url']
231
for name in imdb.genres:
232
genre = session.query(Genre).filter(Genre.name == name).first()
235
movie.genres.append(genre) # pylint:disable=E1101
236
for name in imdb.languages:
237
language = session.query(Language).filter(Language.name == name).first()
239
language = Language(name)
240
movie.languages.append(language) # pylint:disable=E1101
241
for imdb_id, name in imdb.actors.iteritems():
242
actor = session.query(Actor).filter(Actor.imdb_id == imdb_id).first()
244
actor = Actor(imdb_id, name)
245
movie.actors.append(actor) # pylint:disable=E1101
246
for imdb_id, name in imdb.directors.iteritems():
247
director = session.query(Director).filter(Director.imdb_id == imdb_id).first()
249
director = Director(imdb_id, name)
250
movie.directors.append(director) # pylint:disable=E1101
251
# so that we can track how long since we've updated the info later
252
movie.updated = datetime.now()
253
output = session.add(movie)
255
except UnicodeDecodeError:
256
log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url'])
257
# store cache so this will not be tried again
259
movie.url = entry['imdb_url']
261
raise PluginWarning('UnicodeDecodeError')
262
except ValueError, e:
263
# TODO: might be a little too broad catch, what was this for anyway? ;P
264
if feed.manager.options.debug:
266
raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log)
268
# Set values from cache
269
# TODO: I don't like this shoveling ...
270
imdb.url = cached.url
271
imdb.photo = cached.photo
272
imdb.name = cached.title
273
imdb.year = cached.year
274
imdb.votes = cached.votes
275
imdb.score = cached.score
276
imdb.mpaa_rating = cached.mpaa_rating
277
imdb.plot_outline = cached.plot_outline
278
imdb.genres = [genre.name for genre in cached.genres]
279
imdb.languages = [lang.name for lang in cached.languages]
280
for actor in cached.actors:
281
imdb.actors[actor.imdb_id] = actor.name
282
for director in cached.directors:
283
imdb.directors[director.imdb_id] = director.name
285
if imdb.mpaa_rating is None:
286
imdb.mpaa_rating = ''
288
log.log(5, 'imdb.score: %s' % imdb.score)
289
log.log(5, 'imdb.votes: %s' % imdb.votes)
290
log.log(5, 'imdb.year: %s' % imdb.year)
291
log.log(5, 'imdb.genres: %s' % imdb.genres)
292
log.log(5, 'imdb.languages: %s' % imdb.languages)
293
log.log(5, 'imdb.actors: %s' % ', '.join(imdb.actors))
294
log.log(5, 'imdb.directors: %s' % ', '.join(imdb.directors))
295
log.log(5, 'imdb.mpaa_rating: %s' % ', '.join(imdb.mpaa_rating))
298
# TODO: I really don't like this shoveling!
299
entry['imdb_url'] = imdb.url
300
entry['imdb_id'] = imdb.imdb_id
301
entry['imdb_name'] = imdb.name
302
entry['imdb_photo'] = imdb.photo
303
entry['imdb_plot_outline'] = imdb.plot_outline
304
entry['imdb_score'] = imdb.score
305
entry['imdb_votes'] = imdb.votes
306
entry['imdb_year'] = imdb.year
307
entry['imdb_genres'] = imdb.genres
308
entry['imdb_languages'] = imdb.languages
309
entry['imdb_actors'] = imdb.actors
310
entry['imdb_directors'] = imdb.directors
311
entry['imdb_mpaa_rating'] = imdb.mpaa_rating
313
# give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1)
314
if take_a_break and not feed.manager.options.debug and not feed.manager.unit_test:
318
log.log(5, 'committing session')
321
register_plugin(ModuleImdbLookup, 'imdb_lookup')
322
register_parser_option('--retry-lookup', action='store_true', dest='retry_lookup', default=0, help=SUPPRESS_HELP)