flexget.plugins.urlrewrite_google_cse
Covered: 13 lines
Missed: 24 lines
Skipped 11 lines
Percent: 35 %
 1
import re
 2
import urllib2
 3
import logging
 4
from flexget.plugins.plugin_urlrewriting import UrlRewritingError
 5
from flexget.plugin import *
 6
from flexget.utils.soup import get_soup
 7
from flexget.utils.tools import urlopener
 9
log = logging.getLogger('google_cse')
12
class UrlRewriteGoogleCse:
13
    """Google custom query urlrewriter."""
16
    def url_rewritable(self, feed, entry):
17
        if entry['url'].startswith('http://www.google.com/cse?'):
18
            return True
19
        if entry['url'].startswith('http://www.google.com/custom?'):
20
            return True
21
        return False
24
    def url_rewrite(self, feed, entry):
25
        try:
27
            txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
28
            req = urllib2.Request(entry['url'], None, txheaders)
29
            page = urlopener(req, log)
30
            soup = get_soup(page)
31
            results = soup.findAll('a', attrs={'class': 'l'})
32
            if not results:
33
                raise UrlRewritingError('No results')
34
            for res in results:
35
                url = res.get('href')
36
                url = url.replace('/interstitial?url=', '')
38
                regexp = '.*'.join([x.contents[0] for x in res.findAll('em')])
39
                if re.match(regexp, entry['title']):
40
                    log.debug('resolved, found with %s' % regexp)
41
                    entry['url'] = url
42
                    return
43
            raise UrlRewritingError('Unable to resolve')
44
        except Exception, e:
45
            raise UrlRewritingError(e)
47
register_plugin(UrlRewriteGoogleCse, 'google_cse', groups=['urlrewriter'])