2
from flexget.manager import Base
3
from flexget.plugin import register_plugin, priority, register_parser_option
4
from sqlalchemy import Column, Integer, String, DateTime, Unicode, asc, or_
5
from sqlalchemy.schema import ForeignKey
6
from sqlalchemy.orm import relation
7
from datetime import datetime
8
from flexget.manager import Session
10
log = logging.getLogger('seen')
15
__tablename__ = 'seen_entry'
17
id = Column(Integer, primary_key=True)
18
title = Column(Unicode)
19
reason = Column(Unicode)
20
feed = Column(Unicode)
21
added = Column(DateTime)
23
fields = relation('SeenField', backref='seen_entry', cascade='all, delete, delete-orphan')
25
def __init__(self, title, feed, reason=None):
29
self.added = datetime.now()
32
return '<SeenEntry(title=%s,reason=%s,feed=%s,added=%s)>' % (self.title, self.reason, self.feed, self.added)
37
__tablename__ = 'seen_field'
39
id = Column(Integer, primary_key=True)
40
seen_entry_id = Column(Integer, ForeignKey('seen_entry.id'), nullable=False)
41
field = Column(Unicode)
42
value = Column(Unicode, index=True)
43
added = Column(DateTime)
45
def __init__(self, field, value):
48
self.added = datetime.now()
51
return '<SeenField(field=%s,value=%s,added=%s)>' % (self.field, self.value, self.added)
54
class MigrateSeen(object):
56
def migrate(self, feed):
57
"""Migrates 0.9 session data into new database"""
61
shelve = feed.manager.shelve_session
63
log.info('If this crashes, you can\'t migrate 0.9 data to 1.0 ... sorry')
64
for name, data in shelve.iteritems():
65
if not 'seen' in data:
68
for k, v in seen.iteritems():
69
se = SeenEntry(u'N/A', seen.feed, u'migrated')
70
se.fields.append(SeenField(u'unknown', k))
74
log.info('It worked! Migrated %s seen items' % count)
76
log.critical('It crashed :(')
84
from progressbar import ProgressBar, Percentage, Bar, ETA
86
print 'Critical: progressbar library not found, try running `bin/easy_install progressbar` ?'
91
__tablename__ = 'seen'
93
id = Column(Integer, primary_key=True)
94
field = Column(String)
95
value = Column(String, index=True)
97
added = Column(DateTime)
99
def __init__(self, field, value, feed):
103
self.added = datetime.now()
106
return '<Seen(%s=%s)>' % (self.field, self.value)
110
# REPAIR / REMOVE DUPLICATES
113
total = session.query(Seen).count() + 1
115
widgets = ['Repairing - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')]
116
bar = ProgressBar(widgets=widgets, maxval=total).start()
118
for seen in session.query(Seen).all():
120
if (index % 10 == 0):
123
for dupe in session.query(Seen).filter(Seen.value == seen.value):
131
total = session.query(Seen).count() + 1
132
widgets = ['Upgrading - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')]
133
bar = ProgressBar(widgets=widgets, maxval=total).start()
136
for seen in session.query(Seen).all():
138
if (index % 10 == 0):
140
se = SeenEntry(u'N/A', seen.feed, u'migrated')
141
se.added = seen.added
142
se.fields.append(SeenField(seen.field, seen.value))
146
session.execute('drop table seen;')
149
def on_process_start(self, feed):
150
# migrate shelve -> sqlalchemy
151
if feed.manager.shelve_session:
154
# migrate seen to seen_entry
156
from flexget.utils.sqlalchemy_utils import table_exists
157
if table_exists('seen', session):
162
class SeenSearch(object):
164
def on_process_start(self, feed):
165
if not feed.manager.options.seen_search:
168
feed.manager.disable_feeds()
172
for field in session.query(SeenField).\
173
filter(SeenField.value.like(unicode('%' + feed.manager.options.seen_search + '%'))).\
174
order_by(asc(SeenField.added)).all():
176
se = session.query(SeenEntry).filter(SeenEntry.id == field.seen_entry_id).first()
178
print 'ERROR: <SeenEntry(id=%s)> missing' % field.seen_entry_id
181
# don't show duplicates
186
print 'ID: %s Name: %s Feed: %s Added: %s' % (se.id, se.title, se.feed, se.added.strftime('%c'))
188
print ' %s: %s' % (sf.field, sf.value)
197
class SeenForget(object):
199
def on_process_start(self, feed):
200
if not feed.manager.options.forget:
203
feed.manager.disable_feeds()
205
forget = unicode(feed.manager.options.forget)
209
for se in session.query(SeenEntry).filter(or_(SeenEntry.title == forget, SeenEntry.feed == forget)).all():
210
fcount += len(se.fields)
214
for sf in session.query(SeenField).filter(SeenField.value == forget).all():
215
se = session.query(SeenEntry).filter(SeenEntry.id == sf.seen_entry_id).first()
216
fcount += len(se.fields)
220
log.info('Removed %s titles (%s fields)' % (count, fcount))
225
class SeenCmd(object):
227
def on_process_start(self, feed):
228
if not feed.manager.options.seen:
231
feed.manager.disable_feeds()
234
se = SeenEntry(u'--seen', unicode(feed.name))
235
sf = SeenField(u'--seen', unicode(feed.manager.options.seen))
240
log.info('Added %s as seen. This will affect all feeds.' % feed.manager.options.seen)
243
class FilterSeen(object):
245
Remembers previously downloaded content and rejects them in
246
subsequent executions. Without this plugin FlexGet would
247
download all matching content on every execution.
249
This plugin is enabled on all feeds by default.
250
See wiki for more information.
254
# remember and filter by these fields
255
self.fields = ['title', 'url', 'original_url']
256
self.keyword = 'seen'
259
from flexget import validator
260
root = validator.factory()
261
root.accept('boolean')
266
def on_feed_filter(self, feed):
267
"""Filter seen entries"""
268
if not feed.config.get(self.keyword, True):
269
log.debug('%s is disabled' % self.keyword)
272
for entry in feed.entries:
273
# construct list of values looked
275
for field in self.fields:
276
if not field in entry:
278
if entry[field] not in values and entry[field] != '':
279
values.append(entry[field])
280
log.debugall('querying for: %s' % ', '.join(values))
281
# check if SeenField.value is any of the values
282
found = feed.session.query(SeenField).filter(or_(*[SeenField.value == x for x in values])).first()
284
log.debug("Rejecting '%s' '%s' because of seen '%s'" % (entry['url'], entry['title'], found.value))
285
feed.reject(entry, 'Entry with `%s` is already seen' % found.value)
287
def on_feed_exit(self, feed):
288
"""Remember succeeded entries"""
289
if not feed.config.get('seen', True):
290
log.debug('disabled')
293
for entry in feed.accepted:
294
self.learn(feed, entry)
295
# verbose if in learning mode
296
if feed.manager.options.learn:
297
log.info("Learned '%s' (will skip this in the future)" % (entry['title']))
299
def learn(self, feed, entry, fields=None, reason=None):
300
"""Marks entry as seen"""
301
# no explicit fields given, use default
304
se = SeenEntry(entry['title'], unicode(feed.name), reason)
307
if not field in entry:
309
# removes duplicate values (eg. url, original_url are usually same)
310
if entry[field] in remembered:
312
remembered.append(entry[field])
313
sf = SeenField(unicode(field), unicode(entry[field]))
315
log.debug("Learned '%s' (field: %s)" % (entry[field], field))
318
def forget(self, feed, title):
319
"""Forget SeenEntry with :title:. Return True if forgotten."""
320
se = feed.session.query(SeenEntry).filter(SeenEntry.title == title).first()
322
log.debug("Forgotten '%s' (%s fields)" % (title, len(se.fields)))
323
feed.session.delete(se)
327
register_plugin(FilterSeen, 'seen', builtin=True)
328
register_plugin(SeenSearch, '--seen-search', builtin=True)
329
register_plugin(SeenCmd, '--seen', builtin=True)
330
register_plugin(SeenForget, '--forget', builtin=True)
331
register_plugin(MigrateSeen, 'migrate_seen', builtin=True)
333
register_parser_option('--forget', action='store', dest='forget', default=False,
334
metavar='FEED|VALUE', help='Forget feed (completely) or given title or url.')
335
register_parser_option('--seen', action='store', dest='seen', default=False,
336
metavar='VALUE', help='Add title or url to what has been seen in feeds.')
337
register_parser_option('--seen-search', action='store', dest='seen_search', default=False,
338
metavar='VALUE', help='Search given text from seen database.')