1
"""Plugin for text file or URL feeds via regex."""
4
from flexget.entry import Entry
5
from flexget import plugin
6
from flexget.utils.cached_input import cached
7
from flexget.utils.tools import urlopener
9
log = logging.getLogger('text')
12
class Text(plugin.Plugin):
15
Parse any text for entries using regular expression.
19
<field>: <regexp to match value>
21
<field>: <python string formatting>
23
Note: each entry must have atleast two fields, title and url
28
url: http://www.nbc.com/Heroes/js/novels.js
30
title: novelTitle = "(.*)"
31
url: novelPrint = "(.*)"
33
url: http://www.nbc.com%(url)s
37
from flexget import validator
38
root = validator.factory('dict')
39
root.accept('url', key='url')
40
root.accept('file', key='url')
41
root.require_key('url')
42
entry = root.accept('dict', key='entry', required=True)
43
entry.accept('regexp', key='url', required=True)
44
entry.accept('regexp', key='title', required=True)
45
entry.accept_any_key('regexp')
46
format = root.accept('dict', key='format')
47
format.accept_any_key('text')
50
def format_entry(self, entry, d):
51
for k, v in d.iteritems():
56
def on_feed_input(self, feed, config):
58
content = urlopener(url, log)
60
entry_config = config.get('entry')
61
format_config = config.get('format', {})
64
# keep track what fields have been found
70
for field, regexp in entry_config.iteritems():
71
#log.debug('search field: %s regexp: %s' % (field, regexp))
72
match = re.search(regexp, line)
74
# check if used field detected, in such case start with new entry
75
if used.has_key(field):
77
log.info('Found field %s again before entry was completed. \
78
Adding current incomplete, but valid entry and moving to next.' % field)
79
self.format_entry(entry, format_config)
82
log.info('Invalid data, entry field %s is already found once. Ignoring entry.' % field)
88
entry[field] = match.group(1)
90
log.debug('found field: %s value: %s' % (field, entry[field]))
92
# if all fields have been found
93
if len(used) == len(entry_config):
94
# check that entry has atleast title and url
95
if not entry.isvalid():
96
log.info('Invalid data, constructed entry is missing mandatory fields (title or url)')
98
self.format_entry(entry, format_config)
100
log.debug('Added entry %s' % entry)