2
from flexget.plugin import register_plugin, priority, get_plugin_by_name, PluginWarning, PluginError
3
from flexget.manager import Base, Session
4
from flexget.utils.tools import urlopener
6
from sqlalchemy import Column, Integer, Unicode, UnicodeText, DateTime
7
from BeautifulSoup import BeautifulStoneSoup
10
log = logging.getLogger('thetvdb')
15
__tablename__ = 'thetvdb'
17
id = Column(Integer, primary_key=True)
18
series_name = Column(Unicode)
19
series_xml = Column(UnicodeText)
20
added = Column(DateTime)
22
def __init__(self, series_name, series_xml):
23
self.series_name = series_name
24
self.series_xml = series_xml
25
self.added = datetime.datetime.now()
28
return '<Thetvdb(%s=%s)>' % (self.series_name, self.series_xml)
31
class ModuleThetvdbLookup(object):
33
Retrieves TheTVDB information for entries. Uses series_name,
34
series_season, series_episode from series plugin.
36
NOTE: This MUST be executed after series! Thus, priority of
37
any script that uses this needs to be filter priority < 128
38
(that's the priority of series)
44
Primarily used for passing thetvdb information to other plugins.
45
Among these is the IMDB url for the series.
47
This information is provided (via entry):
51
series_status (Continuing or Ended)
52
series_runtime (show runtime in minutes)
61
series_airs_day_of_week
63
series_language (en, fr, etc.)
64
imdb_url (if available)
65
zap2it_id (if available)
66
episode info: (if episode is found)
78
from flexget import validator
79
return validator.factory('boolean')
82
def on_process_start(self, feed):
84
Register the usable set: keywords.
86
set_plugin = get_plugin_by_name('set')
87
set_plugin.instance.register_key('thetvdb_id', 'number')
90
def on_feed_filter(self, feed):
91
from flexget.utils.log import log_once
92
for entry in feed.entries:
94
self.lookup(feed, entry)
95
except PluginError, e:
96
log_once(e.value.capitalize(), logger=log)
97
except PluginWarning, e:
98
log_once(e.value.capitalize(), logger=log)
100
def _convert_date(self, date_to_convert):
102
Take in a date in this format:
104
and spit out a datetime object
106
if not date_to_convert:
108
converted_date = None
110
converted_date = datetime.date(*map(int, date_to_convert.split("-")))
114
return converted_date
116
# TODO: this does not utilize exceptions on errors, raise PluginWarning instead of logging error and returning
117
def lookup(self, feed, entry):
119
Get theTVDB information for the included series_name,
120
series_season, series_episode.
122
# Search for series (need to get latest first_airing_date for default)
123
# http://thetvdb.com/api/GetSeries.php?seriesname=Castle
124
# Castle (2009)'s URL:
125
# http://thetvdb.com/data/series/83462/
126
# Castle (2009) all episode information:
127
# http://thetvdb.com/data/series/83462/all/
128
# Images are base url:
129
# http://thetvdb.com/banners/
131
log.debug("looking up %s" % entry['title'])
133
# Check to make sure that I have all info I need before I start.
134
if not 'series_name' in entry:
135
# TODO: try to apply regexes to this to figure out series name, season and ep number from title.
136
log.debug("series_name not given for %s. Entry not parsed through series plugin" % entry["title"])
138
if not 'series_season' in entry:
139
log.warning("failed getting series_season for %s, but given series_name. Series plugin bug?" % entry['title'])
141
if not 'series_episode' in entry:
142
log.warning("failed getting series_episode for %s, but given series_name. Series plugin bug?" % entry['title'])
145
log.debug("Retrieved internal series info for %(series_name)s - S%(series_season)sE%(series_episode)s" % entry)
149
# if I can't pull the series info from the DB:
150
cachedata = session.query(TheTvDB).filter(TheTvDB.series_name == unicode(entry['series_name'])).first()
152
log.debug('No data cached for %s' % entry['series_name'])
154
# otherwise, if it's more than an hour old...
155
elif cachedata.added < datetime.datetime.now() - datetime.timedelta(hours=1):
157
log.debug('Cache expired for %s' % entry['series_name'])
158
log.debug('Added %s expires %s' % (cachedata.added, datetime.datetime.now() - datetime.timedelta(hours=1)))
159
# remove old expired data
160
session.delete(cachedata)
166
if 'thetvdb_id' in entry:
167
series_id = entry['thetvdb_id']
168
log.debug("Read thetvdb_id \'%(thetvdb_id)d\' from entry for %(title)s" % entry)
170
feed.verbose_progress('Requesting %s information from TheTvDB.com' % entry['series_name'])
171
# get my series data.
172
url = "http://thetvdb.com/api/GetSeries.php?seriesname=%s" % urllib.quote(entry['series_name'])
173
log.debug("url for thetvdb search for %s: %s" % (entry['series_name'], url))
175
page = urlopener(url, log)
177
log.error("Unable to grab series info for %s: %s" % (entry['series_name'], e))
179
xmldata = BeautifulStoneSoup(page).data
181
log.error("Didn't get a return from tvdb on the series search for %s" % entry['series_name'])
183
# Yeah, I'm lazy. Grabbing the one with the latest airing date,
184
# instead of trying to see what's the closest match.
185
# If there's an exact match, return that immediately. Could
186
# run into issues with queries with multiple exact matches.
187
newest_series_first_aired = datetime.date(1800, 1, 1)
188
for i in xmldata.findAll('series', recursive=False):
190
this_series_air_date = self._convert_date(i.firstaired.string)
191
if this_series_air_date > newest_series_first_aired:
192
newest_series_first_aired = this_series_air_date
193
series_id = i.seriesid.string
195
this_series_air_date = ""
197
if i.seriesname.string == entry['series_name']:
198
series_id = i.seriesid.string
200
# Don't really need to store this, but just for consistencies sake so we always have it available
201
newest_series_first_aired = this_series_air_date
204
if series_id is None:
205
log.error("Didn't get a return from tvdb on the series search for %s" % entry['series_name'])
208
# Grab the url, and parse it out into BSS. Store it's root element as data.
209
# TODO: need to impliment error handling around grabbing url.
210
data = BeautifulStoneSoup(urllib.urlopen("http://thetvdb.com/data/series/%s/all/" % str(series_id))).data
211
session.add(TheTvDB(unicode(entry['series_name']), unicode(data)))
213
log.debug('Loaded seriesdata from cache for %s' % entry['series_name'])
214
data = BeautifulStoneSoup(cachedata.series_xml).data
218
if data.series.seriesname:
219
entry['series_name_tvdb'] = data.series.seriesname.string
220
if data.series.rating:
221
entry['series_rating'] = data.series.rating.string
222
if data.series.status:
223
entry['series_status'] = data.series.status.string
224
if data.series.runtime:
225
entry['series_runtime'] = data.series.runtime.string
226
if data.series.firstaired:
227
entry['series_first_air_date'] = self._convert_date(data.series.firstaired.string)
228
if data.series.airs_time:
229
entry['series_air_time'] = data.series.airs_time.string
230
if data.series.contentrating:
231
entry['series_content_rating'] = data.series.contentrating.string
232
if data.series.genre.string:
233
entry['series_genres'] = data.series.genre.string.strip("|").split("|")
234
if data.series.network:
235
entry['series_network'] = data.series.network.string
236
if data.series.banner:
237
entry['series_banner_url'] = "http://www.thetvdb.com/banners/%s" % data.series.banner.string
238
if data.series.fanart:
239
entry['series_fanart_url'] = "http://www.thetvdb.com/banners/%s" % data.series.fanart.string
240
if data.series.poster:
241
entry['series_poster_url'] = "http://www.thetvdb.com/banners/%s" % data.series.poster.string
242
if data.series.airs_dayofweek:
243
entry['series_airs_day_of_week'] = data.series.airs_dayofweek.string
244
if data.series.actors:
245
entry['series_actors'] = data.series.actors.string.strip("|").split("|")
246
if data.series.language:
247
entry['series_language'] = data.series.language.string
248
if data.series.imdb_id.string:
249
entry["imdb_url"] = "http://www.imdb.com/title/%s" % data.series.imdb_id.string
250
if data.series.zap2it_id.string:
251
entry['zap2it_id'] = data.series.zap2it_id.string
253
log.debug("searching for correct episode %(series_name)s - S%(series_season)sE%(series_episode)s from the data" % entry)
255
for i in data.findAll("episode", recursive=False):
256
# print "%s %s %s %s" % (i.combined_season.string, i.episodenumber.string, entry['series_season'], entry['series_episode'])
257
if int(i.combined_season.string) == int(entry['series_season']):
258
if int(i.episodenumber.string) == int(entry['series_episode']):
259
entry['ep_name'] = i.episodename.string
260
log.debug("found episode: %(series_name)s - S%(series_season)sE%(series_episode)s - %(ep_name)s" % entry)
261
entry['ep_director'] = i.director.string
262
entry['ep_writer'] = i.writer.string
263
entry['ep_air_date'] = self._convert_date(i.firstaired.string)
264
entry['ep_rating'] = i.rating.string
265
entry['ep_image_url'] = "http://www.thetvdb.com/banners/%s" % i.filename.string
266
entry['ep_overview'] = i.overview.string
267
if i.gueststars.string:
268
entry['ep_guest_stars'] = i.gueststars.string.strip("|").split("|")
270
entry['ep_guest_stars'] = []
272
# If I didn't get a valid episode out of all that, log a debug message.
273
if not 'ep_name' in entry:
274
log.info("Didn't find an episode on thetvdb for %(series_name)s - S%(series_season)sE%(series_episode)s" % entry)
276
register_plugin(ModuleThetvdbLookup, 'thetvdb_lookup')