1
""" Common tools used by plugins implementing search plugin api """
3
from difflib import SequenceMatcher
4
from unicodedata import normalize
5
from flexget.utils.titles.parser import TitleParser
6
from flexget.utils.titles.movie import MovieParser
7
from flexget.utils import qualities
10
def clean_symbols(text):
11
"""Replaces common symbols with spaces. Also normalize unicode strings in decomposed form."""
13
if isinstance(result, unicode):
14
result = normalize('NFKD', result)
15
return re.sub('[ \(\)\-_\[\]\.]+', ' ', result).lower()
18
def clean_title(title):
19
"""Removes common codec, sound keywords, and special characters info from titles to facilitate
20
loose title comparison.
22
result = TitleParser.remove_words(title, TitleParser.sounds + TitleParser.codecs)
23
result = clean_symbols(result)
27
class AnyComparator(object):
28
"""Comparator that does no comparison. Used to return all results from a search plugin without filtering."""
37
def set_seq1(self, a):
40
def set_seq2(self, b):
43
def matches(self, other=None):
46
def search_string(self):
47
"""Return a cleaned string based on seq1 that can be used for searching."""
49
if isinstance(self.a, unicode):
50
# Convert to combined form for better search results
51
return normalize('NFC', self.a)
55
class StringComparator(SequenceMatcher, object):
56
"""Compares two strings for similarity. Runs a cleaner function on strings before comparison.
57
Cutoff similarity is configurable."""
59
def __init__(self, cutoff=0.9, cleaner=clean_symbols):
61
:param cutoff: Minimum similarity to be considered a match.
62
:param cleaner: Cleaning function to pass strings through before comparison.
65
self.cleaner = cleaner
66
SequenceMatcher.__init__(self)
68
def set_seq1(self, a):
69
"""Set first string for comparison."""
70
SequenceMatcher.set_seq1(self, self.cleaner(a))
72
def set_seq2(self, b):
73
"""Set second string for comparison."""
74
SequenceMatcher.set_seq2(self, self.cleaner(b))
76
def matches(self, other=None):
77
"""Compare the two strings, return True if match is close enough.
79
:param other: String to compare against. If not specified, last specified string will be used.
80
:return: True if match is close enough.
84
return self.ratio() > self.cutoff
86
def search_string(self):
87
"""Return a cleaned string based on seq1 that can be used for searching."""
89
if isinstance(self.a, unicode):
90
# Convert to combined form for better search results
91
return normalize('NFC', self.a)
95
class MovieComparator(StringComparator):
96
"""Compares two strings for similarity based on extracted movie title, year and quality."""
99
self.a_year, self.b_year = None, None
100
self.a_quality, self.b_quality = qualities.UNKNOWN, qualities.UNKNOWN
101
self.parser = MovieParser()
102
super(MovieComparator, self).__init__(cutoff=0.9)
104
def set_seq1(self, a):
105
"""Set first string for comparison."""
107
super(MovieComparator, self).set_seq1(self.parser.name)
108
self.a_year = self.parser.year
109
self.a_quality = self.parser.quality
111
def set_seq2(self, b):
112
"""Set second string for comparison."""
114
super(MovieComparator, self).set_seq2(self.parser.name)
115
self.b_year = self.parser.year
116
self.b_quality = self.parser.quality
118
def matches(self, other=None):
119
"""Compare the two strings, return True if they appear to be the same movie.
121
:param other: String to compare against. If not specified, last specified string will be used.
122
:return: True if match is close enough.
124
result = super(MovieComparator, self).matches(other)
125
if self.a_quality > qualities.UNKNOWN:
126
if self.a_quality != self.b_quality:
128
if self.a_year and self.b_year:
129
if self.a_year != self.b_year:
130
# TODO: Make this fuzzier? tmdb and imdb years do not always match
134
def search_string(self):
135
"""Return a cleaned string based on seq1 that can be used for searching."""
137
if isinstance(result, unicode):
138
# Convert to combined form for better search results
139
result = normalize('NFC', result)
141
result += ' %s' % self.a_year
142
if self.a_quality > qualities.UNKNOWN:
143
# Shorten some quality strings in search because of multiple acceptable forms
144
if '720p' in self.a_quality.name:
146
elif '1080p' in self.a_quality.name:
149
result += ' %s' % self.a_quality
153
def torrent_availability(seeds, leeches):
154
"""Returns a rating based on seeds and leeches for a given torrent.
156
:param seeds: Number of seeds on the torrent
157
:param leeches: Number of leeches on the torrent
158
:return: A numeric rating
161
return seeds * 2 + leeches