flexget.plugins.filter_seen
Covered: 140 lines
Missed: 115 lines
Skipped 84 lines
Percent: 54 %
  1
import logging
  2
from flexget.manager import Base
  3
from flexget.plugin import register_plugin, priority, register_parser_option
  4
from sqlalchemy import Column, Integer, String, DateTime, Unicode, asc, or_
  5
from sqlalchemy.schema import ForeignKey
  6
from sqlalchemy.orm import relation
  7
from datetime import datetime
  8
from flexget.manager import Session
 10
log = logging.getLogger('seen')
 13
class SeenEntry(Base):
 15
    __tablename__ = 'seen_entry'
 17
    id = Column(Integer, primary_key=True)
 18
    title = Column(Unicode)
 19
    reason = Column(Unicode)
 20
    feed = Column(Unicode)
 21
    added = Column(DateTime)
 23
    fields = relation('SeenField', backref='seen_entry', cascade='all, delete, delete-orphan')
 25
    def __init__(self, title, feed, reason=None):
 26
        self.title = title
 27
        self.reason = reason
 28
        self.feed = feed
 29
        self.added = datetime.now()
 31
    def __str__(self):
 32
        return '<SeenEntry(title=%s,reason=%s,feed=%s,added=%s)>' % (self.title, self.reason, self.feed, self.added)
 35
class SeenField(Base):
 37
    __tablename__ = 'seen_field'
 39
    id = Column(Integer, primary_key=True)
 40
    seen_entry_id = Column(Integer, ForeignKey('seen_entry.id'), nullable=False)
 41
    field = Column(Unicode)
 42
    value = Column(Unicode, index=True)
 43
    added = Column(DateTime)
 45
    def __init__(self, field, value):
 46
        self.field = field
 47
        self.value = value
 48
        self.added = datetime.now()
 50
    def __str__(self):
 51
        return '<SeenField(field=%s,value=%s,added=%s)>' % (self.field, self.value, self.added)
 54
class MigrateSeen(object):
 56
    def migrate(self, feed):
 57
        """Migrates 0.9 session data into new database"""
 59
        session = Session()
 60
        try:
 61
            shelve = feed.manager.shelve_session
 62
            count = 0
 63
            log.info('If this crashes, you can\'t migrate 0.9 data to 1.0 ... sorry')
 64
            for name, data in shelve.iteritems():
 65
                if not 'seen' in data:
 66
                    continue
 67
                seen = data['seen']
 68
                for k, v in seen.iteritems():
 69
                    se = SeenEntry(u'N/A', seen.feed, u'migrated')
 70
                    se.fields.append(SeenField(u'unknown', k))
 71
                    session.add(se)
 72
                    count += 1
 73
            session.commit()
 74
            log.info('It worked! Migrated %s seen items' % count)
 75
        except Exception:
 76
            log.critical('It crashed :(')
 77
        finally:
 78
            session.close()
 80
    def migrate2(self):
 81
        session = Session()
 83
        try:
 84
            from progressbar import ProgressBar, Percentage, Bar, ETA
 85
        except:
 86
            print 'Critical: progressbar library not found, try running `bin/easy_install progressbar` ?'
 87
            return
 89
        class Seen(Base):
 91
            __tablename__ = 'seen'
 93
            id = Column(Integer, primary_key=True)
 94
            field = Column(String)
 95
            value = Column(String, index=True)
 96
            feed = Column(String)
 97
            added = Column(DateTime)
 99
            def __init__(self, field, value, feed):
100
                self.field = field
101
                self.value = value
102
                self.feed = feed
103
                self.added = datetime.now()
105
            def __str__(self):
106
                return '<Seen(%s=%s)>' % (self.field, self.value)
108
        print ''
111
        index = 0
112
        removed = 0
113
        total = session.query(Seen).count() + 1
115
        widgets = ['Repairing - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')]
116
        bar = ProgressBar(widgets=widgets, maxval=total).start()
118
        for seen in session.query(Seen).all():
119
            index += 1
120
            if (index % 10 == 0):
121
                bar.update(index)
122
            amount = 0
123
            for dupe in session.query(Seen).filter(Seen.value == seen.value):
124
                amount += 1
125
                if amount > 1:
126
                    removed += 1
127
                    session.delete(dupe)
128
        bar.finish()
131
        total = session.query(Seen).count() + 1
132
        widgets = ['Upgrading - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')]
133
        bar = ProgressBar(widgets=widgets, maxval=total).start()
135
        index = 0
136
        for seen in session.query(Seen).all():
137
            index += 1
138
            if (index % 10 == 0):
139
                bar.update(index)
140
            se = SeenEntry(u'N/A', seen.feed, u'migrated')
141
            se.added = seen.added
142
            se.fields.append(SeenField(seen.field, seen.value))
143
            session.add(se)
144
        bar.finish()
146
        session.execute('drop table seen;')
147
        session.commit()
149
    def on_process_start(self, feed):
151
        if feed.manager.shelve_session:
152
            self.migrate(feed)
155
        session = Session()
156
        from flexget.utils.sqlalchemy_utils import table_exists
157
        if table_exists('seen', session):
158
            self.migrate2()
159
        session.close()
162
class SeenSearch(object):
164
    def on_process_start(self, feed):
165
        if not feed.manager.options.seen_search:
166
            return
168
        feed.manager.disable_feeds()
170
        session = Session()
171
        shown = []
172
        for field in session.query(SeenField).\
173
            filter(SeenField.value.like(unicode('%' + feed.manager.options.seen_search + '%'))).\
174
            order_by(asc(SeenField.added)).all():
176
            se = session.query(SeenEntry).filter(SeenEntry.id == field.seen_entry_id).first()
177
            if not se:
178
                print 'ERROR: <SeenEntry(id=%s)> missing' % field.seen_entry_id
179
                continue
182
            if se.id in shown:
183
                continue
184
            shown.append(se.id)
186
            print 'ID: %s Name: %s Feed: %s Added: %s' % (se.id, se.title, se.feed, se.added.strftime('%c'))
187
            for sf in se.fields:
188
                print ' %s: %s' % (sf.field, sf.value)
189
            print ''
191
        if not shown:
192
            print 'No results'
194
        session.close()
197
class SeenForget(object):
199
    def on_process_start(self, feed):
200
        if not feed.manager.options.forget:
201
            return
203
        feed.manager.disable_feeds()
205
        forget = unicode(feed.manager.options.forget)
206
        session = Session()
207
        count = 0
208
        fcount = 0
209
        for se in session.query(SeenEntry).filter(or_(SeenEntry.title == forget, SeenEntry.feed == forget)).all():
210
            fcount += len(se.fields)
211
            count += 1
212
            session.delete(se)
214
        for sf in session.query(SeenField).filter(SeenField.value == forget).all():
215
            se = session.query(SeenEntry).filter(SeenEntry.id == sf.seen_entry_id).first()
216
            fcount += len(se.fields)
217
            count += 1
218
            session.delete(se)
220
        log.info('Removed %s titles (%s fields)' % (count, fcount))
222
        session.commit()
225
class SeenCmd(object):
227
    def on_process_start(self, feed):
228
        if not feed.manager.options.seen:
229
            return
231
        feed.manager.disable_feeds()
233
        session = Session()
234
        se = SeenEntry(u'--seen', unicode(feed.name))
235
        sf = SeenField(u'--seen', unicode(feed.manager.options.seen))
236
        se.fields.append(sf)
237
        session.add(se)
238
        session.commit()
240
        log.info('Added %s as seen. This will affect all feeds.' % feed.manager.options.seen)
243
class FilterSeen(object):
244
    """
245
        Remembers previously downloaded content and rejects them in
246
        subsequent executions. Without this plugin FlexGet would
247
        download all matching content on every execution.
249
        This plugin is enabled on all feeds by default.
250
        See wiki for more information.
251
    """
253
    def __init__(self):
255
        self.fields = ['title', 'url', 'original_url']
256
        self.keyword = 'seen'
258
    def validator(self):
259
        from flexget import validator
260
        root = validator.factory()
261
        root.accept('boolean')
262
        root.accept('text')
263
        return root
265
    @priority(255)
266
    def on_feed_filter(self, feed):
267
        """Filter seen entries"""
268
        if not feed.config.get(self.keyword, True):
269
            log.debug('%s is disabled' % self.keyword)
270
            return
272
        for entry in feed.entries:
274
            values = []
275
            for field in self.fields:
276
                if not field in entry:
277
                    continue
278
                if entry[field] not in values and entry[field] != '':
279
                    values.append(entry[field])
280
            log.debugall('querying for: %s' % ', '.join(values))
282
            found = feed.session.query(SeenField).filter(or_(*[SeenField.value == x for x in values])).first()
283
            if found:
284
                log.debug("Rejecting '%s' '%s' because of seen '%s'" % (entry['url'], entry['title'], found.value))
285
                feed.reject(entry, 'Entry with `%s` is already seen' % found.value)
287
    def on_feed_exit(self, feed):
288
        """Remember succeeded entries"""
289
        if not feed.config.get('seen', True):
290
            log.debug('disabled')
291
            return
293
        for entry in feed.accepted:
294
            self.learn(feed, entry)
296
            if feed.manager.options.learn:
297
                log.info("Learned '%s' (will skip this in the future)" % (entry['title']))
299
    def learn(self, feed, entry, fields=None, reason=None):
300
        """Marks entry as seen"""
302
        if not fields:
303
            fields = self.fields
304
        se = SeenEntry(entry['title'], unicode(feed.name), reason)
305
        remembered = []
306
        for field in fields:
307
            if not field in entry:
308
                continue
310
            if entry[field] in remembered:
311
                continue
312
            remembered.append(entry[field])
313
            sf = SeenField(unicode(field), unicode(entry[field]))
314
            se.fields.append(sf)
315
            log.debug("Learned '%s' (field: %s)" % (entry[field], field))
316
        feed.session.add(se)
318
    def forget(self, feed, title):
319
        """Forget SeenEntry with :title:. Return True if forgotten."""
320
        se = feed.session.query(SeenEntry).filter(SeenEntry.title == title).first()
321
        if se:
322
            log.debug("Forgotten '%s' (%s fields)" % (title, len(se.fields)))
323
            feed.session.delete(se)
324
            return True
327
register_plugin(FilterSeen, 'seen', builtin=True)
328
register_plugin(SeenSearch, '--seen-search', builtin=True)
329
register_plugin(SeenCmd, '--seen', builtin=True)
330
register_plugin(SeenForget, '--forget', builtin=True)
331
register_plugin(MigrateSeen, 'migrate_seen', builtin=True)
333
register_parser_option('--forget', action='store', dest='forget', default=False,
334
                       metavar='FEED|VALUE', help='Forget feed (completely) or given title or url.')
335
register_parser_option('--seen', action='store', dest='seen', default=False,
336
                       metavar='VALUE', help='Add title or url to what has been seen in feeds.')
337
register_parser_option('--seen-search', action='store', dest='seen_search', default=False,
338
                       metavar='VALUE', help='Search given text from seen database.')