3
from flexget.plugin import register_plugin, priority, get_plugin_by_name
5
log = logging.getLogger('regexp')
8
class FilterRegexp(object):
14
[operation]: # operation to perform on matches
15
- [regexp] # simple regexp
16
- [regexp]: <path> # override path
18
[path]: <path> # override path
19
[not]: <regexp> # not match
20
[from]: <field> # search from given entry field
22
[path]: <path> # override path
23
[not]: # list of not match regexps
25
[from]: # search only from these fields
29
[rest]: <operation> # non matching entries are
30
[from]: # search only from these fields for all regexps
33
Possible operations: accept, reject, accept_excluding, reject_excluding
37
from flexget import validator
39
def build_list(regexps):
40
regexps.accept('regexp')
42
# bundle is a dictionary form
43
bundle = regexps.accept('dict')
44
# path as a single parameter
45
bundle.accept_valid_keys('path', allow_replacement=True, key_type='regexp')
47
# advanced configuration as a parameter
48
advanced = bundle.accept_valid_keys('dict', key_type='regexp')
49
advanced.accept('path', key='path', allow_replacement=True)
50
# accept set parameters
51
set = advanced.accept('dict', key='set')
52
set.accept_any_key('any')
53
# not as a single parameter
54
advanced.accept('regexp', key='not')
55
# from as a single parameter
56
advanced.accept('text', key='from')
59
advanced.accept('list', key='not').accept('regexp')
62
advanced.accept('list', key='from').accept('text')
64
conf = validator.factory('dict')
65
for operation in ['accept', 'reject', 'accept_excluding', 'reject_excluding']:
66
regexps = conf.accept('list', key=operation)
69
conf.accept('choice', key='rest').accept_choices(['accept', 'reject'])
70
conf.accept('text', key='from')
73
def get_config(self, feed):
75
Returns the config in standard format
77
All regexps are turned into dictionaries in the form {regexp: options}
78
options is a dict that can (but may not) contain the following keys
79
path: will be attached to entries that match
80
set: a dict of values to be attached to entries that match via set plugin
81
from: a list of fields in entry for the regexps to match against
82
not: a list of regexps that if matching, will disqualify the main match
84
config = feed.config.get('regexp', {})
87
out_config['rest'] = config['rest']
88
# Turn all our regexps into advanced form dicts
89
for operation, regexps in config.iteritems():
90
if operation in ['rest', 'from']:
92
for regexp_item in regexps:
93
if not isinstance(regexp_item, dict):
95
regexp_item = {regexp: {}}
96
regexp, opts = regexp_item.items()[0]
97
# Parse custom settings for this regexp
98
if not isinstance(opts, dict):
100
# advanced configuration
101
if config.get('from'):
102
opts.setdefault('from', config['from'])
103
# Put plain strings into list form for from and not
104
if 'from' in opts and isinstance(opts['from'], basestring):
105
opts['from'] = [opts['from']]
106
if 'not' in opts and isinstance(opts['not'], basestring):
107
opts['not'] = [opts['not']]
108
# make sure regxp is a string for series like '24'
109
regexp = unicode(regexp)
110
out_config.setdefault(operation, []).append({regexp: opts})
114
def on_feed_filter(self, feed):
115
# TODO: what if accept and accept_excluding configured? Should raise error ...
116
config = self.get_config(feed)
118
for operation, regexps in config.iteritems():
119
if operation == 'rest':
121
r = self.filter(feed, operation, regexps)
125
# If there is already something in rest, take the intersection with r (entries no operations matched)
126
rest = [entry for entry in r if entry in rest]
129
rest_method = feed.accept if config['rest'] == 'accept' else feed.reject
131
log.debug('Rest method %s for %s' % (config['rest'], entry['title']))
132
# The remember keyword causes the remember_rejected plugin to filter this next time
133
rest_method(entry, 'regexp `rest`', remember=True)
135
def matches(self, entry, regexp, find_from=None, not_regexps=None):
136
"""Check if :entry: has any string fields or strings in a list field that match :regexp:.
137
Optional :find_from: can be given as a list to limit searching fields"""
139
for field in find_from or entry:
140
if not entry.get(field):
142
# Make all fields into lists to search
143
values = entry[field]
144
if not isinstance(values, list):
147
if not isinstance(value, basestring):
151
value = urllib.unquote(value)
152
# If none of the not_regexps match
153
if re.search(regexp, value, re.IGNORECASE | re.UNICODE):
154
# Make sure the not_regexps do not match for this field
155
for not_regexp in not_regexps or []:
156
if self.matches(entry, not_regexp, find_from=[field]):
158
else: # None of the not_regexps matched
162
def filter(self, feed, operation, regexps):
164
operation - one of 'accept' 'reject' 'accept_excluding' and 'reject_excluding'
165
accept and reject will be called on the entry if any of the regxps match
166
_excluding operations will be called if any of the regexps don't match
167
regexps - list of {regexp: options} dictionaries
169
Return list of entries that didn't match regexps
172
method = feed.accept if 'accept' in operation else feed.reject
173
match_mode = 'excluding' not in operation
174
for entry in feed.entries:
175
for regexp_opts in regexps:
176
regexp, opts = regexp_opts.items()[0]
178
# check if entry matches given regexp, also makes sure it doesn't match secondary
179
field = self.matches(entry, regexp, opts.get('from'), opts.get('not'))
180
# Run if we are in match mode and have a hit, or are in non-match mode and don't have a hit
181
if match_mode == bool(field):
182
# Creates the string with the reason for the hit
183
matchtext = 'regexp \'%s\' ' % regexp + ('matched field \'%s\'' % field if match_mode else 'didn\'t match')
184
log.debug('%s for %s' % (matchtext, entry['title']))
185
# apply settings to entry and run the method on it
187
entry['path'] = opts['path']
189
log.debug('adding set: info to entry:"%s" %s' % (entry['title'], opts['set']))
190
set = get_plugin_by_name('set')
191
set.instance.modify(entry, opts['set'])
192
method(entry, matchtext, remember=True)
193
# We had a match so break out of the regexp loop.
196
# We didn't run method for any of the regexps, add this entry to rest
200
register_plugin(FilterRegexp, 'regexp')