5
from datetime import datetime, timedelta
7
from BeautifulSoup import BeautifulStoneSoup
8
from sqlalchemy import Column, Integer, Float, String, Unicode, Boolean, DateTime, func
9
from sqlalchemy.schema import ForeignKey
10
from sqlalchemy.orm import relation
11
from requests import RequestException
12
from flexget import schema
13
from flexget.utils.requests import Session as ReqSession
14
from flexget.utils.database import with_session, pipe_list_synonym, text_date_synonym
15
from flexget.utils.sqlalchemy_utils import table_add_column
16
from flexget.manager import Session
17
from flexget.utils.simple_persistence import SimplePersistence
21
log = logging.getLogger('api_tvdb')
22
Base = schema.versioned_base('api_tvdb', SCHEMA_VER)
23
requests = ReqSession(timeout=20)
25
# This is a FlexGet API key
26
api_key = '4D297D8CFDE0E105'
28
server = 'http://www.thetvdb.com/api/'
30
persist = SimplePersistence('api_tvdb')
33
@schema.upgrade('api_tvdb')
34
def upgrade(ver, session):
36
if 'last_updated' in persist:
37
del persist['last_updated']
40
table_add_column('tvdb_episodes', 'gueststars', Unicode, session)
46
def get_mirror(type='xml'):
47
"""Returns a random mirror for a given type 'xml', 'zip', or 'banner'"""
49
if not _mirrors.get(type):
50
# Get the list of mirrors from tvdb
52
data = BeautifulStoneSoup(requests.get(server + api_key + '/mirrors.xml').content)
53
except RequestException:
54
raise LookupError('Could not retrieve mirror list from thetvdb')
55
for mirror in data.findAll('mirror'):
56
type_mask = int(mirror.typemask.string)
57
mirrorpath = mirror.mirrorpath.string
58
for t in [(1, 'xml'), (2, 'banner'), (4, 'zip')]:
60
_mirrors.setdefault(t[1], set()).add(mirrorpath)
61
if _mirrors.get(type):
62
return random.sample(_mirrors[type], 1)[0] + ('/banners/' if type == 'banner' else '/api/')
64
# If nothing was populated from the server's mirror list, return the main site as fallback
65
return 'http://thetvdb.com' + ('/banners/' if type == 'banner' else '/api/')
68
class TVDBContainer(object):
69
"""Base class for TVDb objects"""
71
def __init__(self, init_bss=None):
73
self.update_from_bss(init_bss)
75
def update_from_bss(self, update_bss):
76
"""Populates any simple (string or number) attributes from a dict"""
77
for col in self.__table__.columns:
78
tag = update_bss.find(col.name)
79
if tag and tag.string:
80
if isinstance(col.type, Integer):
81
value = int(tag.string)
82
elif isinstance(col.type, Float):
83
value = float(tag.string)
86
setattr(self, col.name, value)
90
class TVDBSeries(TVDBContainer, Base):
92
__tablename__ = "tvdb_series"
94
id = Column(Integer, primary_key=True, autoincrement=False)
95
lastupdated = Column(Integer)
96
expired = Column(Boolean)
97
seriesname = Column(Unicode)
98
language = Column(Unicode)
99
rating = Column(Float)
100
status = Column(Unicode)
101
runtime = Column(Integer)
102
airs_time = Column(Unicode)
103
airs_dayofweek = Column(Unicode)
104
contentrating = Column(Unicode)
105
network = Column(Unicode)
106
imdb_id = Column(String)
107
zap2it_id = Column(String)
108
banner = Column(String)
109
fanart = Column(String)
110
poster = Column(String)
111
poster_file = Column(Unicode)
112
_genre = Column('genre', Unicode)
113
genre = pipe_list_synonym('_genre')
114
_firstaired = Column('firstaired', DateTime)
115
firstaired = text_date_synonym('_firstaired')
117
episodes = relation('TVDBEpisode', backref='series', cascade='all, delete, delete-orphan')
121
raise LookupError('Cannot update a series without a tvdb id.')
122
url = get_mirror() + api_key + '/series/%s/%s.xml' % (self.id, language)
124
data = requests.get(url).content
125
except RequestException, e:
126
raise LookupError('Request failed %s' % url)
127
result = BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES).find('series')
129
self.update_from_bss(result)
131
raise LookupError('Could not retrieve information from thetvdb')
133
def get_poster(self, only_cached=False):
134
"""Downloads this poster to a local cache and returns the path"""
135
from flexget.manager import manager
136
base_dir = os.path.join(manager.config_base, 'userstatic')
137
if os.path.isfile(os.path.join(base_dir, self.poster_file or '')):
138
return self.poster_file
141
# If we don't already have a local copy, download one.
142
url = get_mirror('banner') + self.poster
143
log.debug('Downloading poster %s' % url)
144
dirname = os.path.join('tvdb', 'posters')
145
# Create folders if the don't exist
146
fullpath = os.path.join(base_dir, dirname)
147
if not os.path.isdir(fullpath):
148
os.makedirs(fullpath)
149
filename = os.path.join(dirname, posixpath.basename(self.poster))
150
thefile = file(os.path.join(base_dir, filename), 'wb')
151
thefile.write(requests.get(url).content)
152
self.poster_file = filename
153
# If we are detached from a session, update the db
154
if not Session.object_session(self):
156
session.query(TVDBSeries).filter(TVDBSeries.id == self.id).update(values={'poster_file': filename})
161
return '<TVDBSeries name=%s,tvdb_id=%s>' % (self.seriesname, self.id)
164
class TVDBEpisode(TVDBContainer, Base):
165
__tablename__ = 'tvdb_episodes'
167
id = Column(Integer, primary_key=True, autoincrement=False)
168
expired = Column(Boolean)
169
lastupdated = Column(Integer)
170
seasonnumber = Column(Integer)
171
episodenumber = Column(Integer)
172
episodename = Column(Unicode)
173
overview = Column(Unicode)
174
_director = Column('director', Unicode)
175
director = pipe_list_synonym('_director')
176
_writer = Column('writer', Unicode)
177
writer = pipe_list_synonym('_writer')
178
_gueststars = Column('gueststars', Unicode)
179
gueststars = pipe_list_synonym('_gueststars')
180
rating = Column(Float)
181
filename = Column(Unicode)
182
_firstaired = Column('firstaired', DateTime)
183
firstaired = text_date_synonym('_firstaired')
185
series_id = Column(Integer, ForeignKey('tvdb_series.id'), nullable=False)
189
raise LookupError('Cannot update an episode without an episode id.')
190
url = get_mirror() + api_key + '/episodes/%s/%s.xml' % (self.id, language)
192
data = requests.get(url).content
193
except RequestException, e:
194
raise LookupError('Request failed %s' % url)
195
result = BeautifulStoneSoup(data).find('episode')
197
self.update_from_bss(result)
199
raise LookupError('Could not retrieve information from thetvdb')
202
return '<TVDBEpisode series=%s,season=%s,episode=%s>' %\
203
(self.series.seriesname, self.seasonnumber, self.episodenumber)
206
class TVDBSearchResult(Base):
208
__tablename__ = 'tvdb_search_results'
210
id = Column(Integer, primary_key=True)
211
search = Column(Unicode, nullable=False)
212
series_id = Column(Integer, ForeignKey('tvdb_series.id'), nullable=True)
213
series = relation(TVDBSeries, backref='search_strings')
216
def find_series_id(name):
217
"""Looks up the tvdb id for a series"""
218
url = server + 'GetSeries.php?seriesname=%s&language=%s' % (urllib.quote(name), language)
220
page = requests.get(url).content
221
except RequestException, e:
222
raise LookupError("Unable to get search results for %s: %s" % (name, e))
223
xmldata = BeautifulStoneSoup(page).data
225
log.error("Didn't get a return from tvdb on the series search for %s" % name)
227
# See if there is an exact match
228
# TODO: Check if there are multiple exact matches
229
firstmatch = xmldata.find('series')
230
if firstmatch and firstmatch.seriesname.string.lower() == name.lower():
231
return int(firstmatch.seriesid.string)
232
# If there is no exact match, sort by airing date and pick the latest
233
# TODO: Is there a better way to do this? Maybe weight name similarity and air date
234
series_list = [(s.firstaired.string, s.seriesid.string) for s in xmldata.findAll('series', recursive=False) if s.firstaired]
236
series_list.sort(key=lambda s: s[0], reverse=True)
237
return int(series_list[0][1])
239
raise LookupError('No results for `%s`' % name)
243
def lookup_series(name=None, tvdb_id=None, only_cached=False, session=None):
244
if not name and not tvdb_id:
245
raise LookupError('No criteria specified for tvdb lookup')
247
log.debug('Looking up tvdb information for %r' % {'name': name, 'tvdb_id': tvdb_id})
252
return '<name=%s,tvdb_id=%s>' % (name, tvdb_id)
255
series = session.query(TVDBSeries).filter(TVDBSeries.id == tvdb_id).first()
256
if not series and name:
257
series = session.query(TVDBSeries).filter(func.lower(TVDBSeries.seriesname) == name.lower()).first()
259
found = session.query(TVDBSearchResult). \
260
filter(func.lower(TVDBSearchResult.search) == name.lower()).first()
261
if found and found.series:
262
series = found.series
264
# Series found in cache, update if cache has expired.
266
mark_expired(session=session)
267
if series.expired and not only_cached:
268
log.verbose('Data for %s has expired, refreshing from tvdb' % series.seriesname)
271
except LookupError, e:
272
log.warning('Error while updating from tvdb (%s), using cached data.' % e.message)
274
log.debug('Series %s information restored from cache.' % id_str())
277
raise LookupError('Series %s not found from cache' % id_str())
278
# There was no series found in the cache, do a lookup from tvdb
279
log.debug('Series %s not found in cache, looking up from tvdb.' % id_str())
281
series = TVDBSeries()
284
if series.seriesname:
287
tvdb_id = find_series_id(name)
289
series = session.query(TVDBSeries).filter(TVDBSeries.id == tvdb_id).first()
291
series = TVDBSeries()
295
if name.lower() != series.seriesname.lower():
296
session.add(TVDBSearchResult(search=name, series=series))
299
raise LookupError('No results found from tvdb for %s' % id_str())
306
def lookup_episode(name=None, seasonnum=None, episodenum=None, tvdb_id=None, only_cached=False, session=None):
307
# First make sure we have the series data
308
series = lookup_series(name=name, tvdb_id=tvdb_id, only_cached=only_cached, session=session)
310
raise LookupError('Could not identify series')
311
ep_description = '%s.S%sE%s' % (series.seriesname, seasonnum, episodenum)
312
# See if we have this episode cached
313
episode = session.query(TVDBEpisode).filter(TVDBEpisode.series_id == series.id).\
314
filter(TVDBEpisode.seasonnumber == seasonnum).\
315
filter(TVDBEpisode.episodenumber == episodenum).first()
317
if episode.expired and not only_cached:
318
log.info('Data for %r has expired, refreshing from tvdb' % episode)
321
except LookupError, e:
322
log.warning('Error while updating from tvdb (%s), using cached data.' % e.message)
324
log.debug('Using episode info from cache.')
327
raise LookupError('Episode %s not found from cache' % ep_description)
328
# There was no episode found in the cache, do a lookup from tvdb
329
log.debug('Episode %s not found in cache, looking up from tvdb.' % ep_description)
330
url = get_mirror() + api_key + '/series/%d/default/%d/%d/%s.xml' % (series.id, seasonnum, episodenum, language)
332
raw_data = requests.get(url).content
333
data = BeautifulStoneSoup(raw_data).data
335
ep_data = data.find('episode')
337
# Check if this episode id is already in our db
338
episode = session.query(TVDBEpisode).filter(TVDBEpisode.id == ep_data.id.string).first()
340
episode.update_from_bss(ep_data)
342
episode = TVDBEpisode(ep_data)
343
series.episodes.append(episode)
344
session.merge(series)
345
except RequestException, e:
346
raise LookupError('Error looking up episode from TVDb (%s)' % e)
348
# Access the series attribute to force it to load before returning
352
raise LookupError('No results found for ')
356
def mark_expired(session=None):
357
"""Marks series and episodes that have expired since we cached them"""
358
# Only get the expired list every hour
359
last_server = persist.get('last_server')
360
last_local = persist.get('last_local')
361
if not last_local or not last_server:
363
elif last_local + timedelta(hours=1) > datetime.now():
364
# It has been less than an hour, don't check again yet
368
# Get items that have changed since our last update
369
updates = BeautifulStoneSoup(requests.get(server + 'Updates.php?type=all&time=%s' % last_server).content).items
370
except RequestException, e:
371
log.error('Could not get update information from tvdb: %s' % e)
374
# Make lists of expired series and episode ids
375
expired_series = [int(series.string) for series in updates.findAll('series')]
376
expired_episodes = [int(ep.string) for ep in updates.findAll('episode')]
379
"""Helper to divide our expired lists into sizes sqlite can handle in a query. (<1000)"""
380
for i in xrange(0, len(seq), 900):
383
# Update our cache to mark the items that have expired
384
for chunk in chunked(expired_series):
385
num = session.query(TVDBSeries).filter(TVDBSeries.id.in_(chunk)).update({'expired': True}, 'fetch')
386
log.debug('%s series marked as expired' % num)
387
for chunk in chunked(expired_episodes):
388
num = session.query(TVDBEpisode).filter(TVDBEpisode.id.in_(chunk)).update({'expired': True}, 'fetch')
389
log.debug('%s episodes marked as expired' % num)
390
# Save the time of this update
391
new_server = str(updates.find('time').string)
392
persist['last_local'] = datetime.now()
393
persist['last_server'] = new_server