4
from flexget.plugins.plugin_urlrewriting import UrlRewritingError
5
from flexget.plugin import *
6
from flexget.utils.soup import get_soup
7
from flexget.utils.tools import urlopener
9
log = logging.getLogger('google_cse')
12
class UrlRewriteGoogleCse:
13
"""Google custom query urlrewriter."""
16
def url_rewritable(self, feed, entry):
17
if entry['url'].startswith('http://www.google.com/cse?'):
19
if entry['url'].startswith('http://www.google.com/custom?'):
24
def url_rewrite(self, feed, entry):
26
# need to fake user agent
27
txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
28
req = urllib2.Request(entry['url'], None, txheaders)
29
page = urlopener(req, log)
31
results = soup.findAll('a', attrs={'class': 'l'})
33
raise UrlRewritingError('No results')
36
url = url.replace('/interstitial?url=', '')
37
# generate match regexp from google search result title
38
regexp = '.*'.join([x.contents[0] for x in res.findAll('em')])
39
if re.match(regexp, entry['title']):
40
log.debug('resolved, found with %s' % regexp)
43
raise UrlRewritingError('Unable to resolve')
45
raise UrlRewritingError(e)
47
register_plugin(UrlRewriteGoogleCse, 'google_cse', groups=['urlrewriter'])