flexget.plugins.input.text
Covered: 43 lines
Missed: 36 lines
Skipped 26 lines
Percent: 54 %
  1
"""Plugin for text file or URL feeds via regex."""
  2
import re
  3
import logging
  4
from flexget.entry import Entry
  5
from flexget import plugin
  6
from flexget.utils.cached_input import cached
  7
from flexget.utils.tools import urlopener
  9
log = logging.getLogger('text')
 12
class Text(plugin.Plugin):
 14
    """
 15
    Parse any text for entries using regular expression.
 17
    url: <url>
 18
    entry:
 19
      <field>: <regexp to match value>
 20
    format:
 21
      <field>: <python string formatting>
 23
    Note: each entry must have atleast two fields, title and url
 25
    Example:
 27
    text:
 28
      url: http://www.nbc.com/Heroes/js/novels.js
 29
      entry:
 30
        title: novelTitle = "(.*)"
 31
        url: novelPrint = "(.*)"
 32
      format:
 33
        url: http://www.nbc.com%(url)s
 34
    """
 36
    def validator(self):
 37
        from flexget import validator
 38
        root = validator.factory('dict')
 39
        root.accept('url', key='url')
 40
        root.accept('file', key='url')
 41
        root.require_key('url')
 42
        entry = root.accept('dict', key='entry', required=True)
 43
        entry.accept('regexp', key='url', required=True)
 44
        entry.accept('regexp', key='title', required=True)
 45
        entry.accept_any_key('regexp')
 46
        format = root.accept('dict', key='format')
 47
        format.accept_any_key('text')
 48
        return root
 50
    def format_entry(self, entry, d):
 51
        for k, v in d.iteritems():
 52
            entry[k] = v % entry
 54
    @cached('text')
 55
    @plugin.internet(log)
 56
    def on_feed_input(self, feed, config):
 57
        url = config['url']
 58
        content = urlopener(url, log)
 60
        entry_config = config.get('entry')
 61
        format_config = config.get('format', {})
 63
        entries = []
 65
        used = {}
 66
        entry = Entry()
 69
        for line in content:
 70
            for field, regexp in entry_config.iteritems():
 72
                match = re.search(regexp, line)
 73
                if match:
 75
                    if used.has_key(field):
 76
                        if entry.isvalid():
 77
                            log.info('Found field %s again before entry was completed. \
 78
                                      Adding current incomplete, but valid entry and moving to next.' % field)
 79
                            self.format_entry(entry, format_config)
 80
                            entries.append(entry)
 81
                        else:
 82
                            log.info('Invalid data, entry field %s is already found once. Ignoring entry.' % field)
 84
                        entry = Entry()
 85
                        used = {}
 88
                    entry[field] = match.group(1)
 89
                    used[field] = True
 90
                    log.debug('found field: %s value: %s' % (field, entry[field]))
 93
                if len(used) == len(entry_config):
 95
                    if not entry.isvalid():
 96
                        log.info('Invalid data, constructed entry is missing mandatory fields (title or url)')
 97
                    else:
 98
                        self.format_entry(entry, format_config)
 99
                        entries.append(entry)
100
                        log.debug('Added entry %s' % entry)
102
                        entry = Entry()
103
                        used = {}
104
        return entries