flexget.plugins.input_text
Covered: 31 lines
Missed: 46 lines
Skipped 27 lines
Percent: 40 %
  1
from flexget.feed import Entry
  2
from flexget.plugin import register_plugin, internet
  3
from flexget.utils.cached_input import cached
  4
import re
  5
import logging
  6
from flexget.utils.tools import urlopener
  8
log = logging.getLogger('text')
 11
class InputText(object):
 13
    """
 14
    Parse any text for entries using regular expression.
 16
    url: <url>
 17
    entry:
 18
      <field>: <regexp to match value>
 19
    format:
 20
      <field>: <python string formatting>
 22
    Note: each entry must have atleast two fields, title and url
 24
    Example:
 26
    text:
 27
      url: http://www.nbc.com/Heroes/js/novels.js
 28
      entry:
 29
        title: novelTitle = "(.*)"
 30
        url: novelPrint = "(.*)"
 31
      format:
 32
        url: http://www.nbc.com%(url)s
 33
    """
 35
    def validator(self):
 36
        from flexget import validator
 37
        root = validator.factory('dict')
 38
        root.accept('url', key='url')
 39
        root.accept('file', key='url')
 40
        root.require_key('url')
 41
        entry = root.accept('dict', key='entry', required=True)
 42
        entry.accept('regexp', key='url', required=True)
 43
        entry.accept('regexp', key='title', required=True)
 44
        entry.accept_any_key('regexp')
 45
        format = root.accept('dict', key='format')
 46
        format.accept_any_key('text')
 47
        return root
 49
    def format_entry(self, entry, d):
 50
        for k, v in d.iteritems():
 51
            entry[k] = v % entry
 53
    @cached('text', 'url')
 54
    @internet(log)
 55
    def on_feed_input(self, feed):
 56
        url = feed.config['text']['url']
 57
        content = urlopener(url, log)
 59
        entry_config = feed.config['text'].get('entry')
 60
        format_config = feed.config['text'].get('format', {})
 63
        used = {}
 64
        entry = Entry()
 67
        for line in content:
 68
            for field, regexp in entry_config.iteritems():
 70
                match = re.search(regexp, line)
 71
                if match:
 73
                    if used.has_key(field):
 74
                        if entry.isvalid():
 75
                            log.info('Found field %s again before entry was completed. \
 76
                                      Adding current incomplete, but valid entry and moving to next.' % field)
 77
                            self.format_entry(entry, format_config)
 78
                            feed.entries.append(entry)
 79
                        else:
 80
                            log.info('Invalid data, entry field %s is already found once. Ignoring entry.' % field)
 82
                        entry = Entry()
 83
                        used = {}
 86
                    entry[field] = match.group(1)
 87
                    used[field] = True
 88
                    log.debug('found field: %s value: %s' % (field, entry[field]))
 91
                if len(used) == len(entry_config):
 93
                    if not entry.isvalid():
 94
                        log.info('Invalid data, constructed entry is missing mandatory fields (title or url)')
 95
                    else:
 96
                        self.format_entry(entry, format_config)
 97
                        feed.entries.append(entry)
 98
                        log.debug('Added entry %s' % entry)
100
                        entry = Entry()
101
                        used = {}
103
register_plugin(InputText, 'text')