Fabrice Laporte avatar Fabrice Laporte committed 60cc058 Merge

Merge branch 'master' of https://github.com/sampsyo/beets

Comments (0)

Files changed (31)

 f28ea9e2ef8d39913d79dbba73db280ff0740c50 v1.1.0-beta.2
 8f070ce28a7b33d8509b29a8dbe937109bbdbd21 v1.1.0-beta.3
 97f04ce252332dbda013cbc478d702d54a8fc1bd v1.1.0
+b3f7b5267a2f7b46b826d087421d7f4569211240 v1.2.0
+b3f7b5267a2f7b46b826d087421d7f4569211240 v1.2.0
+ecff182221ec32a9f6549ad3ce8d2ab4c3e5568a v1.2.0

beets/__init__.py

 # The above copyright notice and this permission notice shall be
 # included in all copies or substantial portions of the Software.
 
-__version__ = '1.1.1'
+__version__ = '1.2.1'
 __author__ = 'Adrian Sampson <adrian@radbox.org>'
 
 import beets.library

beets/autotag/hooks.py

 
 # Aggregation of sources.
 
-def _album_for_id(album_id):
-    """Get a list of albums corresponding to a release ID."""
-    candidates = []
-
-    # Candidates from MusicBrainz.
+def album_for_mbid(release_id):
+    """Get an AlbumInfo object for a MusicBrainz release ID. Return None
+    if the ID is not found.
+    """
     try:
-        candidates.append(mb.album_for_id(album_id))
+        return mb.album_for_id(release_id)
     except mb.MusicBrainzAPIError as exc:
         exc.log(log)
 
-    # From plugins.
-    candidates.extend(plugins.album_for_id(album_id))
-
-    return filter(None, candidates)
-
-def _track_for_id(track_id):
-    """Get an item for a recording ID."""
-    candidates = []
-
-    # From MusicBrainz.
+def track_for_mbid(recording_id):
+    """Get a TrackInfo object for a MusicBrainz recording ID. Return None
+    if the ID is not found.
+    """
     try:
-        candidates.append(mb.track_for_id(track_id))
+        return mb.track_for_id(recording_id)
     except mb.MusicBrainzAPIError as exc:
         exc.log(log)
 
-    # From plugins.
-    candidates.extend(plugins.track_for_id(track_id))
-
+def albums_for_id(album_id):
+    """Get a list of albums for an ID."""
+    candidates = [album_for_mbid(album_id)]
+    candidates.extend(plugins.album_for_id(album_id))
     return filter(None, candidates)
 
-def _album_candidates(items, artist, album, va_likely):
+def tracks_for_id(track_id):
+    """Get a list of tracks for an ID."""
+    candidates = [track_for_mbid(track_id)]
+    candidates.extend(plugins.track_for_id(track_id))
+    return filter(None, candidates)
+
+def album_candidates(items, artist, album, va_likely):
     """Search for album matches. ``items`` is a list of Item objects
     that make up the album. ``artist`` and ``album`` are the respective
     names (strings), which may be derived from the item list or may be
 
     return out
 
-def _item_candidates(item, artist, title):
+def item_candidates(item, artist, title):
     """Search for item matches. ``item`` is the Item to be matched.
     ``artist`` and ``title`` are strings and either reflect the item or
     are specified by the user.

beets/autotag/match.py

 from beets.autotag import hooks
 
 # A configuration view for the distance weights.
-weights = config['match']['weight']
+weights = config['match']['distance_weights']
 
 # Parameters for string distance function.
 # Words that can be moved to the end of a string using a comma.
     """
     return item.track not in (track_info.medium_index, track_info.index)
 
+class Distance(object):
+    """Keeps track of multiple distance penalties. Provides a single weighted
+    distance for all penalties as well as a weighted distance for each
+    individual penalty.
+    """
+    def __cmp__(self, other):
+        return cmp(self.distance, other)
+
+    def __float__(self):
+        return self.distance
+
+    def __getitem__(self, key):
+        """Returns the weighted distance for a named penalty.
+        """
+        dist = sum(self._penalties[key]) * weights[key].as_number()
+        dist_max = self.max_distance
+        if dist_max:
+            return dist / dist_max
+        return 0.0
+
+    def __init__(self):
+        self._penalties = {}
+
+    def __iter__(self):
+        return iter(self.sorted)
+
+    def __len__(self):
+        return len(self.sorted)
+
+    def __sub__(self, other):
+        return self.distance - other
+
+    def __rsub__(self, other):
+        return other - self.distance
+
+    def _eq(self, value1, value2):
+        """Returns True if `value1` is equal to `value2`. `value1` may be a
+        compiled regular expression, in which case it will be matched against
+        `value2`.
+        """
+        if isinstance(value1, re._pattern_type):
+            return bool(value1.match(value2))
+        return value1 == value2
+
+    def add(self, key, dist):
+        """Adds a distance penalty. `key` must correspond with a configured
+        weight setting. `dist` must be a float between 0.0 and 1.0, and will be
+        added to any existing distance penalties for the same key.
+        """
+        if not 0.0 <= dist <= 1.0:
+            raise ValueError(
+                    '`dist` must be between 0.0 and 1.0. It is: %r' % dist)
+        self._penalties.setdefault(key, []).append(dist)
+
+    def add_equality(self, key, value, options):
+        """Adds a distance penalty of 1.0 if `value` doesn't match any of the
+        values in `options`. If an option is a compiled regular expression, it
+        will be considered equal if it matches against `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        for opt in options:
+            if self._eq(opt, value):
+                dist = 0.0
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_expr(self, key, expr):
+        """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0.
+        """
+        if expr:
+            self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_number(self, key, number1, number2):
+        """Adds a distance penalty of 1.0 for each number of difference between
+        `number1` and `number2`, or 0.0 when there is no difference. Use this
+        when there is no upper limit on the difference between the two numbers.
+        """
+        diff = abs(number1 - number2)
+        if diff:
+            for i in range(diff):
+                self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_priority(self, key, value, options):
+        """Adds a distance penalty that corresponds to the position at which
+        `value` appears in `options`. A distance penalty of 0.0 for the first
+        option, or 1.0 if there is no matching option. If an option is a
+        compiled regular expression, it will be considered equal if it matches
+        against `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        unit = 1.0 / (len(options) or 1)
+        for i, opt in enumerate(options):
+            if self._eq(opt, value):
+                dist = i * unit
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_ratio(self, key, number1, number2):
+        """Adds a distance penalty for `number1` as a ratio of `number2`.
+        `number1` is bound at 0 and `number2`.
+        """
+        number = float(max(min(number1, number2), 0))
+        if number2:
+            dist = number / number2
+        else:
+            dist = 0.0
+        self.add(key, dist)
+
+    def add_string(self, key, str1, str2):
+        """Adds a distance penalty based on the edit distance between `str1`
+        and `str2`.
+        """
+        dist = string_dist(str1, str2)
+        self.add(key, dist)
+
+    @property
+    def distance(self):
+        """Returns a weighted and normalised distance across all penalties.
+        """
+        dist_max = self.max_distance
+        if dist_max:
+            return self.raw_distance / self.max_distance
+        return 0.0
+
+    @property
+    def max_distance(self):
+        """Returns the maximum distance penalty.
+        """
+        dist_max = 0.0
+        for key, penalty in self._penalties.iteritems():
+            dist_max += len(penalty) * weights[key].as_number()
+        return dist_max
+
+    @property
+    def raw_distance(self):
+        """Returns the raw (denormalised) distance.
+        """
+        dist_raw = 0.0
+        for key, penalty in self._penalties.iteritems():
+            dist_raw += sum(penalty) * weights[key].as_number()
+        return dist_raw
+
+    @property
+    def sorted(self):
+        """Returns a list of (dist, key) pairs, with `dist` being the weighted
+        distance, sorted from highest to lowest. Does not include penalties
+        with a zero value.
+        """
+        list_ = []
+        for key in self._penalties:
+            dist = self[key]
+            if dist:
+                list_.append((dist, key))
+        # Convert distance into a negative float we can sort items in ascending
+        # order (for keys, when the penalty is equal) and still get the items
+        # with the biggest distance first.
+        return sorted(list_, key=lambda (dist, key): (0-dist, key))
+
+    def update(self, dist):
+        """Adds all the distance penalties from `dist`.
+        """
+        if not isinstance(dist, Distance):
+            raise ValueError(
+                    '`dist` must be a Distance object. It is: %r' % dist)
+        for key, penalties in dist._penalties.iteritems():
+            self._penalties.setdefault(key, []).extend(penalties)
+
 def track_distance(item, track_info, incl_artist=False):
     """Determines the significance of a track metadata change. Returns a
-    float in [0.0,1.0]. `incl_artist` indicates that a distance
-    component should be included for the track artist (i.e., for
-    various-artist releases).
+    Distance object. `incl_artist` indicates that a distance component should
+    be included for the track artist (i.e., for various-artist releases).
     """
-    # Distance and normalization accumulators.
-    dist, dist_max = 0.0, 0.0
+    dist = Distance()
 
-    # Check track length.
-    # If there's no length to check, apply no penalty.
+    # Length.
     if track_info.length:
-        diff = abs(item.length - track_info.length)
-        diff = max(diff - weights['track_length_grace'].as_number(), 0.0)
-        diff = min(diff, weights['track_length_max'].as_number())
-        dist += (diff / weights['track_length_max'].as_number()) * \
-                weights['track_length'].as_number()
-    dist_max += weights['track_length'].as_number()
+        diff = abs(item.length - track_info.length) - \
+               weights['track_length_grace'].as_number()
+        dist.add_ratio('track_length', diff,
+                       weights['track_length_max'].as_number())
 
-    # Track title.
-    dist += string_dist(item.title, track_info.title) * \
-        weights['track_title'].as_number()
-    dist_max += weights['track_title'].as_number()
+    # Title.
+    dist.add_string('track_title', item.title, track_info.title)
 
-    # Track artist, if included.
-    # Attention: MB DB does not have artist info for all compilations,
-    # so only check artist distance if there is actually an artist in
-    # the MB track data.
+    # Artist. Only check if there is actually an artist in the track data.
     if incl_artist and track_info.artist and \
             item.artist.lower() not in VA_ARTISTS:
-        dist += string_dist(item.artist, track_info.artist) * \
-                weights['track_artist'].as_number()
-        dist_max += weights['track_artist'].as_number()
+        dist.add_string('track_artist', item.artist, track_info.artist)
 
     # Track index.
     if track_info.index and item.track:
-        if track_index_changed(item, track_info):
-            dist += weights['track_index'].as_number()
-        dist_max += weights['track_index'].as_number()
+        dist.add_expr('track_index', track_index_changed(item, track_info))
 
-    # MusicBrainz track ID.
+    # Track ID.
     if item.mb_trackid:
-        if item.mb_trackid != track_info.track_id:
-            dist += weights['track_id'].as_number()
-        dist_max += weights['track_id'].as_number()
+        dist.add_expr('track_id', item.mb_trackid != track_info.track_id)
 
-    # Plugin distances.
-    plugin_d, plugin_dm = plugins.track_distance(item, track_info)
-    dist += plugin_d
-    dist_max += plugin_dm
+    # Plugins.
+    dist.update(plugins.track_distance(item, track_info))
 
-    return dist / dist_max
+    return dist
 
 def distance(items, album_info, mapping):
     """Determines how "significant" an album metadata change would be.
-    Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object
+    Returns a Distance object. `album_info` is an AlbumInfo object
     reflecting the album to be compared. `items` is a sequence of all
     Item objects that will be matched (order is not important).
     `mapping` is a dictionary mapping Items to TrackInfo objects; the
     """
     likelies, _ = current_metadata(items)
 
-    # These accumulate the possible distance components. The final
-    # distance will be dist/dist_max.
-    dist = 0.0
-    dist_max = 0.0
+    dist = Distance()
 
-    # Artist/album metadata.
+    # Artist, if not various.
     if not album_info.va:
-        dist += string_dist(likelies['artist'], album_info.artist) * \
-                weights['artist'].as_number()
-        dist_max += weights['artist'].as_number()
-    dist += string_dist(likelies['album'], album_info.album) * \
-            weights['album'].as_number()
-    dist_max += weights['album'].as_number()
+        dist.add_string('artist', likelies['artist'], album_info.artist)
 
-    # Year. No penalty for matching release or original year.
-    if likelies['year'] and album_info.year:
-        if likelies['year'] not in (album_info.year, album_info.original_year):
-            diff = abs(album_info.year - likelies['year'])
-            if diff:
-                dist += (1.0 - 1.0 / diff) * weights['year'].as_number()
-        dist_max += weights['year'].as_number()
+    # Album.
+    dist.add_string('album', likelies['album'], album_info.album)
 
-    # Actual or preferred media.
-    preferred_media = config['match']['preferred_media'].get()
-    if likelies['media'] and album_info.media:
-        dist += string_dist(likelies['media'], album_info.media) * \
-                weights['media'].as_number()
-        dist_max += weights['media'].as_number()
-    elif album_info.media and preferred_media:
-        dist += string_dist(album_info.media, preferred_media) * \
-                weights['media'].as_number()
-        dist_max += weights['media'].as_number()
+    # Current or preferred media.
+    if album_info.media:
+        # Preferred media options.
+        patterns = config['match']['preferred']['media'].as_str_seq()
+        options = [re.compile(r'(\d+x)?(%s)' % pat, re.I) for pat in patterns]
+        if options:
+            dist.add_priority('media', album_info.media, options)
+        # Current media.
+        elif likelies['media']:
+            dist.add_equality('media', album_info.media, likelies['media'])
 
-    # MusicBrainz album ID.
+    # Mediums.
+    if likelies['disctotal'] and album_info.mediums:
+        dist.add_number('mediums', likelies['disctotal'], album_info.mediums)
+
+    # Prefer earliest release.
+    if album_info.year and config['match']['preferred']['original_year']:
+        # Assume 1889 (earliest first gramophone discs) if we don't know the
+        # original year.
+        original = album_info.original_year or 1889
+        diff = abs(album_info.year - original)
+        diff_max = abs(datetime.date.today().year - original)
+        dist.add_ratio('year', diff, diff_max)
+    # Year.
+    elif likelies['year'] and album_info.year:
+        if likelies['year'] in (album_info.year, album_info.original_year):
+            # No penalty for matching release or original year.
+            dist.add('year', 0.0)
+        elif album_info.original_year:
+            # Prefer matchest closest to the release year.
+            diff = abs(likelies['year'] - album_info.year)
+            diff_max = abs(datetime.date.today().year -
+                           album_info.original_year)
+            dist.add_ratio('year', diff, diff_max)
+        else:
+            # Full penalty when there is no original year.
+            dist.add('year', 1.0)
+
+    # Preferred countries.
+    patterns = config['match']['preferred']['countries'].as_str_seq()
+    options = [re.compile(pat, re.I) for pat in patterns]
+    if album_info.country and options:
+        dist.add_priority('country', album_info.country, options)
+    # Country.
+    elif likelies['country'] and album_info.country:
+        dist.add_string('country', likelies['country'], album_info.country)
+
+    # Label.
+    if likelies['label'] and album_info.label:
+        dist.add_string('label', likelies['label'], album_info.label)
+
+    # Catalog number.
+    if likelies['catalognum'] and album_info.catalognum:
+        dist.add_string('catalognum', likelies['catalognum'],
+                        album_info.catalognum)
+
+    # Disambiguation.
+    if likelies['albumdisambig'] and album_info.albumdisambig:
+        dist.add_string('albumdisambig', likelies['albumdisambig'],
+                        album_info.albumdisambig)
+
+    # Album ID.
     if likelies['mb_albumid']:
-        if likelies['mb_albumid'] != album_info.album_id:
-            dist += weights['album_id'].as_number()
-        dist_max += weights['album_id'].as_number()
+        dist.add_equality('album_id', likelies['mb_albumid'],
+                          album_info.album_id)
 
-    # Apply a small penalty for differences across many minor metadata. This
-    # helps prioritise releases that are nearly identical.
+    # Tracks.
+    dist.tracks = {}
+    for item, track in mapping.iteritems():
+        dist.tracks[track] = track_distance(item, track, album_info.va)
+        dist.add('tracks', dist.tracks[track].distance)
 
-    if likelies['disctotal']:
-        if likelies['disctotal'] != album_info.mediums:
-            dist += weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
+    # Missing tracks.
+    for i in range(len(album_info.tracks) - len(mapping)):
+        dist.add('missing_tracks', 1.0)
 
-    if likelies['label'] and album_info.label:
-        dist += string_dist(likelies['label'], album_info.label) * \
-                weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
+    # Unmatched tracks.
+    for i in range(len(items) - len(mapping)):
+        dist.add('unmatched_tracks', 1.0)
 
-    if likelies['catalognum'] and album_info.catalognum:
-        dist += string_dist(likelies['catalognum'],
-                            album_info.catalognum) * \
-                weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
+    # Plugins.
+    dist.update(plugins.album_distance(items, album_info, mapping))
 
-    if likelies['country'] and album_info.country:
-        dist += string_dist(likelies['country'],
-                            album_info.country) * \
-                weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
-
-    if likelies['albumdisambig'] and album_info.albumdisambig:
-        dist += string_dist(likelies['albumdisambig'],
-                            album_info.albumdisambig) * \
-                weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
-
-    # Matched track distances.
-    for item, track in mapping.iteritems():
-        dist += track_distance(item, track, album_info.va) * \
-                weights['track'].as_number()
-        dist_max += weights['track'].as_number()
-
-    # Extra and unmatched tracks.
-    for track in set(album_info.tracks) - set(mapping.values()):
-        dist += weights['missing'].as_number()
-        dist_max += weights['missing'].as_number()
-    for item in set(items) - set(mapping.keys()):
-        dist += weights['unmatched'].as_number()
-        dist_max += weights['unmatched'].as_number()
-
-    # Plugin distances.
-    plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping)
-    dist += plugin_d
-    dist_max += plugin_dm
-
-    # Normalize distance, avoiding divide-by-zero.
-    if dist_max == 0.0:
-        return 0.0
-    else:
-        return dist / dist_max
+    return dist
 
 def match_by_id(items):
     """If the items are tagged with a MusicBrainz album ID, returns an
     if bool(reduce(lambda x,y: x if x==y else (), albumids)):
         albumid = albumids[0]
         log.debug('Searching for discovered album ID: ' + albumid)
-        matches = hooks._album_for_id(albumid)
-        if matches:
-            return matches[0]
+        return hooks.album_for_mbid(albumid)
     else:
         log.debug('No album ID consensus.')
 
     recommendation based on the results' distances.
 
     If the recommendation is higher than the configured maximum for
-    certain situations, the recommendation will be downgraded to the
-    configured maximum.
+    an applied penalty, the recommendation will be downgraded to the
+    configured maximum for that penalty.
     """
     if not results:
         # No candidates: no recommendation.
         # Gap between first two candidates is large.
         rec = recommendation.low
     else:
-        # No conclusion.
-        rec = recommendation.none
+        # No conclusion. Return immediately. Can't be downgraded any further.
+        return recommendation.none
 
-    # "Downgrades" in certain configured situations.
+    # Downgrade to the max rec if it is lower than the current rec for an
+    # applied penalty.
+    keys = set(key for _, key in min_dist)
     if isinstance(results[0], hooks.AlbumMatch):
-        # Load the configured recommendation maxima.
-        max_rec = {}
-        for trigger in 'non_mb_source', 'partial', 'tracklength', 'tracknumber':
-            max_rec[trigger] = \
-                config['match']['max_rec'][trigger].as_choice({
-                    'strong': recommendation.strong,
-                    'medium': recommendation.medium,
-                    'low': recommendation.low,
-                    'none': recommendation.none,
-                })
-
-        # Non-MusicBrainz source.
-        if rec > max_rec['non_mb_source'] and \
-                results[0].info.data_source != 'MusicBrainz':
-            rec = max_rec['non_mb_source']
-
-        # Partial match.
-        if rec > max_rec['partial'] and \
-                (results[0].extra_items or results[0].extra_tracks):
-            rec = max_rec['partial']
-
-        # Check track number and duration for each item.
-        for item, track_info in results[0].mapping.items():
-            # Track length differs.
-            if rec > max_rec['tracklength'] and \
-                    item.length and track_info.length and \
-                    abs(item.length - track_info.length) > \
-                    weights['track_length_grace'].as_number():
-                rec = max_rec['tracklength']
-
-            # Track number differs.
-            if rec > max_rec['tracknumber'] and \
-                    track_index_changed(item, track_info):
-                rec = max_rec['tracknumber']
+        for track_dist in min_dist.tracks.values():
+            keys.update(key for _, key in track_dist)
+    for key in keys:
+        max_rec = config['match']['max_rec'][key].as_choice({
+            'strong': recommendation.strong,
+            'medium': recommendation.medium,
+            'low': recommendation.low,
+            'none': recommendation.none,
+        })
+        rec = min(rec, max_rec)
 
     return rec
 
 
     # Get the change distance.
     dist = distance(items, info, mapping)
+
+    # Skip matches with ignored penalties.
+    penalties = [key for _, key in dist]
+    for penalty in config['match']['ignored'].as_str_seq():
+        if penalty in penalties:
+            log.debug('Ignored. Penalty: %s' % penalty)
+            return
+
     log.debug('Success. Distance: %f' % dist)
-
     results[info.album_id] = hooks.AlbumMatch(dist, info, mapping,
                                               extra_items, extra_tracks)
 
         - The current artist.
         - The current album.
         - A list of AlbumMatch objects. The candidates are sorted by
-        distance (i.e., best match first).
+          distance (i.e., best match first).
         - A recommendation.
     If search_artist and search_album or search_id are provided, then
     they are used as search terms in place of the current metadata.
     # Search by explicit ID.
     if search_id is not None:
         log.debug('Searching for album ID: ' + search_id)
-        search_cands = hooks._album_for_id(search_id)
+        search_cands = hooks.albums_for_id(search_id)
 
     # Use existing metadata or text search.
     else:
         log.debug(u'Album might be VA: %s' % str(va_likely))
 
         # Get the results from the data sources.
-        search_cands = hooks._album_candidates(items, search_artist,
-                                               search_album, va_likely)
+        search_cands = hooks.album_candidates(items, search_artist,
+                                              search_album, va_likely)
 
     log.debug(u'Evaluating %i candidates.' % len(search_cands))
     for info in search_cands:
     trackid = search_id or item.mb_trackid
     if trackid:
         log.debug('Searching for track ID: ' + trackid)
-        for track_info in hooks._track_for_id(trackid):
+        for track_info in hooks.tracks_for_id(trackid):
             dist = track_distance(item, track_info, incl_artist=True)
             candidates[track_info.track_id] = \
                     hooks.TrackMatch(dist, track_info)
     log.debug(u'Item search terms: %s - %s' % (search_artist, search_title))
 
     # Get and evaluate candidate metadata.
-    for track_info in hooks._item_candidates(item, search_artist, search_title):
+    for track_info in hooks.item_candidates(item, search_artist, search_title):
         dist = track_distance(item, track_info, incl_artist=True)
         candidates[track_info.track_id] = hooks.TrackMatch(dist, track_info)
 

beets/config_default.yaml

     medium_rec_thresh: 0.25
     rec_gap_thresh: 0.25
     max_rec:
-        non_mb_source: strong
-        partial: medium
-        tracklength: strong
-        tracknumber: strong
-    preferred_media: CD
-    weight:
+        source: strong
+        artist: strong
+        album: strong
+        media: strong
+        mediums: strong
+        year: strong
+        country: strong
+        label: strong
+        catalognum: strong
+        albumdisambig: strong
+        album_id: strong
+        tracks: strong
+        missing_tracks: medium
+        unmatched_tracks: medium
+        track_title: strong
+        track_artist: strong
+        track_index: strong
+        track_length_grace: strong
+        track_length_max: strong
+        track_length: strong
+        track_id: strong
+    distance_weights:
         source: 2.0
         artist: 3.0
         album: 3.0
+        media: 1.0
+        mediums: 1.0
         year: 1.0
-        media: 1.0
+        country: 0.5
+        label: 0.5
+        catalognum: 0.5
+        albumdisambig: 0.5
         album_id: 5.0
-        minor: 0.5
-        track: 1.0
-        missing: 0.9
-        unmatched: 0.6
+        tracks: 2.0
+        missing_tracks: 0.9
+        unmatched_tracks: 0.6
         track_title: 3.0
         track_artist: 2.0
         track_index: 1.0
         track_length_max: 30
         track_length: 2.0
         track_id: 5.0
+    preferred:
+        countries: []
+        media: []
+        original_year: no
+    ignored: []
         try:
             f = MediaFile(syspath(read_path))
         except (OSError, IOError) as exc:
-            raise util.FilesystemError(exc, 'read', (self.path,),
+            raise util.FilesystemError(exc, 'read', (read_path,),
                                        traceback.format_exc())
 
         for key in ITEM_KEYS_META:
         # Build the mapping for substitution in the template,
         # beginning with the values from the database.
         mapping = {}
-        for key in ITEM_KEYS_META:
+        for key in ITEM_KEYS:
             # Get the values from either the item or its album.
             if key in ALBUM_KEYS_ITEM and album is not None:
                 # From album.
                 value = format_for_path(value, key, pathmod)
             mapping[key] = value
 
-        # Additional fields in non-sanitized case.
-        if not sanitize:
+        # Include the path if we're not sanitizing to construct a path.
+        if sanitize:
+            del mapping['path']
+        else:
             mapping['path'] = displayable_path(self.path)
 
         # Use the album artist if the track artist is not set and

beets/mediafile.py

 class UnreadableFileError(Exception):
     pass
 
-class FileIOError(UnreadableFileError, IOError):
-    def __init__(self, exc):
-        IOError.__init__(self, exc.errno, exc.strerror, exc.filename)
-
 # Raised for files that don't seem to have a type MediaFile supports.
 class FileTypeError(UnreadableFileError):
     pass
 # Human-readable type names.
 TYPES = {
     'mp3':  'MP3',
-    'mp4':  'AAC',
+    'aac':  'AAC',
+    'alac':  'ALAC',
     'ogg':  'OGG',
     'flac': 'FLAC',
     'ape':  'APE',
     'asf':  'Windows Media',
 }
 
+MP4_TYPES = ('aac', 'alac')
+
 
 # Utility.
 
             obj.mgfile[style.key] = out
 
     def _styles(self, obj):
-        if obj.type in ('mp3', 'mp4', 'asf'):
+        if obj.type in ('mp3', 'asf'):
             styles = self.styles[obj.type]
+        elif obj.type in MP4_TYPES:
+            styles = self.styles['mp4']
         else:
             styles = self.styles['etc']  # Sane styles.
 
                     out = out[:-len(style.suffix)]
 
             # MPEG-4 freeform frames are (should be?) encoded as UTF-8.
-            if obj.type == 'mp4' and style.key.startswith('----:') and \
+            if obj.type in MP4_TYPES and style.key.startswith('----:') and \
                     isinstance(out, str):
                 out = out.decode('utf8')
 
 
             # MPEG-4 "freeform" (----) frames must be encoded as UTF-8
             # byte strings.
-            if obj.type == 'mp4' and style.key.startswith('----:') and \
+            if obj.type in MP4_TYPES and style.key.startswith('----:') and \
                     isinstance(out, unicode):
                 out = out.encode('utf8')
 
 
             return picframe.data
 
-        elif obj.type == 'mp4':
+        elif obj.type in MP4_TYPES:
             if 'covr' in obj.mgfile:
                 covers = obj.mgfile['covr']
                 if covers:
             )
             obj.mgfile['APIC'] = picframe
 
-        elif obj.type == 'mp4':
+        elif obj.type in MP4_TYPES:
             if val is None:
                 if 'covr' in obj.mgfile:
                     del obj.mgfile['covr']
         self.path = path
 
         unreadable_exc = (
-            mutagen.mp3.HeaderNotFoundError,
-            mutagen.flac.FLACNoHeaderError,
+            mutagen.mp3.error,
+            mutagen.id3.error,
+            mutagen.flac.error,
             mutagen.monkeysaudio.MonkeysAudioHeaderError,
-            mutagen.mp4.MP4StreamInfoError,
-            mutagen.oggvorbis.OggVorbisHeaderError,
-            mutagen.asf.ASFHeaderError,
+            mutagen.mp4.error,
+            mutagen.oggvorbis.error,
+            mutagen.ogg.error,
+            mutagen.asf.error,
+            mutagen.apev2.error,
         )
         try:
             self.mgfile = mutagen.File(path)
             log.debug(u'header parsing failed: {0}'.format(unicode(exc)))
             raise UnreadableFileError('Mutagen could not read file')
         except IOError as exc:
-            raise FileIOError(exc)
+            if type(exc) == IOError:
+                # This is a base IOError, not a subclass from Mutagen or
+                # anywhere else.
+                raise
+            else:
+                log.debug(traceback.format_exc())
+                raise UnreadableFileError('Mutagen raised an exception')
         except Exception as exc:
             # Hide bugs in Mutagen.
             log.debug(traceback.format_exc())
             raise FileTypeError('file type unsupported by Mutagen')
         elif type(self.mgfile).__name__ == 'M4A' or \
              type(self.mgfile).__name__ == 'MP4':
-            self.type = 'mp4'
+            # This hack differentiates AAC and ALAC until we find a more
+            # deterministic approach. Mutagen only sets the sample rate
+            # for AAC files. See:
+            # https://github.com/sampsyo/beets/pull/295
+            if hasattr(self.mgfile.info, 'sample_rate') and \
+               self.mgfile.info.sample_rate > 0:
+                self.type = 'aac'
+            else:
+                self.type = 'alac'
         elif type(self.mgfile).__name__ == 'ID3' or \
              type(self.mgfile).__name__ == 'MP3':
             self.type = 'mp3'
         return {}
 
     def track_distance(self, item, info):
-        """Should return a (distance, distance_max) pair to be added
-        to the distance value for every track comparison.
+        """Should return a Distance object to be added to the
+        distance for every track comparison.
         """
-        return 0.0, 0.0
+        return beets.autotag.match.Distance()
 
     def album_distance(self, items, album_info, mapping):
-        """Should return a (distance, distance_max) pair to be added
-        to the distance value for every album-level comparison.
+        """Should return a Distance object to be added to the
+        distance for every album-level comparison.
         """
-        return 0.0, 0.0
+        return beets.autotag.match.Distance()
 
     def candidates(self, items, artist, album, va_likely):
         """Should return a sequence of AlbumInfo objects that match the
 
 def track_distance(item, info):
     """Gets the track distance calculated by all loaded plugins.
-    Returns a (distance, distance_max) pair.
+    Returns a Distance object.
     """
-    dist = 0.0
-    dist_max = 0.0
+    dist = beets.autotag.match.Distance()
     for plugin in find_plugins():
-        d, dm = plugin.track_distance(item, info)
-        dist += d
-        dist_max += dm
-    return dist, dist_max
+        dist.update(plugin.track_distance(item, info))
+    return dist
 
 def album_distance(items, album_info, mapping):
     """Returns the album distance calculated by plugins."""
-    dist = 0.0
-    dist_max = 0.0
+    dist = beets.autotag.match.Distance()
     for plugin in find_plugins():
-        d, dm = plugin.album_distance(items, album_info, mapping)
-        dist += d
-        dist_max += dm
-    return dist, dist_max
+        dist.update(plugin.album_distance(items, album_info, mapping))
+    return dist
 
 def candidates(items, artist, album, va_likely):
     """Gets MusicBrainz candidates for an album from each plugin.

beets/ui/__init__.py

     else:
         return text
 
-def _colordiff(a, b, highlight='red'):
+def _colordiff(a, b, highlight='red', second_highlight='lightgray'):
     """Given two values, return the same pair of strings except with
     their differences highlighted in the specified color. Strings are
     highlighted intelligently to show differences; other values are
             # Left only.
             a_out.append(colorize(highlight, a[a_start:a_end]))
         elif op == 'replace':
-            # Right and left differ.
-            a_out.append(colorize(highlight, a[a_start:a_end]))
-            b_out.append(colorize(highlight, b[b_start:b_end]))
+            # Right and left differ. Colorise with second highlight if
+            # it's just a case change.
+            if a[a_start:a_end].lower() != b[b_start:b_end].lower():
+                color = highlight
+            else:
+                color = second_highlight
+            a_out.append(colorize(color, a[a_start:a_end]))
+            b_out.append(colorize(color, b[b_start:b_end]))
         else:
             assert(False)
 

beets/ui/commands.py

 
 VARIOUS_ARTISTS = u'Various Artists'
 
-PARTIAL_MATCH_MESSAGE = u'(partial match!)'
-
 # Importer utilities and support.
 
 def disambig_string(info):
-    """Returns label, year and media disambiguation, if available.
+    """Returns source, media, year, country, label and album disambiguation.
     """
     disambig = []
+    if info.data_source != 'MusicBrainz':
+        disambig.append(info.data_source)
     if info.media:
         if info.mediums > 1:
             disambig.append(u'{0}x{1}'.format(
         out = ui.colorize('red', out)
     return out
 
+def penalty_string(distance, limit=None):
+    """Returns a colorized string that indicates all the penalties applied to
+    a distance object.
+    """
+    penalties = []
+    for _, key in distance:
+        key = key.replace('album_', '')
+        key = key.replace('track_', '')
+        key = key.replace('_', ' ')
+        penalties.append(key)
+    if penalties:
+        if limit and len(penalties) > limit:
+            penalties = penalties[:limit] + ['...']
+        return ui.colorize('yellow', '(%s)' % ', '.join(penalties))
+
 def show_change(cur_artist, cur_album, match):
     """Print out a representation of the changes that will be made if an
     album's tags are changed according to `match`, which must be an AlbumMatch
     object.
     """
-    def show_album(artist, album, partial=False):
+    def show_album(artist, album):
         if artist:
             album_description = u'    %s - %s' % (artist, album)
         elif album:
             album_description = u'    %s' % album
         else:
             album_description = u'    (unknown album)'
-
-        out = album_description
-
-        # Add a suffix if this is a partial match.
-        if partial:
-            out += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE)
-
-        print_(out)
+        print_(album_description)
 
     def format_index(track_info):
         """Return a string representing the track index of the given
         print_("To:")
         show_album(artist_r, album_r)
     else:
-        message = u"Tagging:\n    %s - %s" % (match.info.artist,
-                                              match.info.album)
-        if match.extra_items or match.extra_tracks:
-            message += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE)
-        print_(message)
+        print_(u"Tagging:\n    %s - %s" % (match.info.artist, match.info.album))
 
     # Data URL.
     if match.info.data_url:
 
     # Info line.
     info = []
+    # Similarity.
     info.append('(Similarity: %s)' % dist_string(match.distance))
-    if match.info.data_source != 'MusicBrainz':
-        info.append(ui.colorize('turquoise', '(%s)' % match.info.data_source))
+    # Penalties.
+    penalties = penalty_string(match.distance)
+    if penalties:
+        info.append(penalties)
+    # Disambiguation.
     disambig = disambig_string(match.info)
     if disambig:
         info.append(ui.colorize('lightgray', '(%s)' % disambig))
         cur_track, new_track = format_index(item), format_index(track_info)
         if cur_track != new_track:
             if item.track in (track_info.index, track_info.medium_index):
-                color = 'yellow'
+                color = 'lightgray'
             else:
                 color = 'red'
             if (cur_track + new_track).count('-') == 1:
             rhs += templ.format(rhs_length)
             lhs_width += len(cur_length) + 3
 
-        # Hidden penalties. No LHS/RHS diff is displayed, but we still want to
-        # indicate that a penalty has been applied to explain the similarity
-        # score.
-        penalties = []
-        if match.info.va and track_info.artist and \
-                item.artist.lower() not in VA_ARTISTS:
-            penalties.append('artist')
-        if item.mb_trackid and item.mb_trackid != track_info.track_id:
-            penalties.append('ID')
+        # Penalties.
+        penalties = penalty_string(match.distance.tracks[track_info])
         if penalties:
-            rhs += ' %s' % ui.colorize('red',
-                                       '(%s)' % ', '.join(penalties))
+            rhs += ' %s' % penalties
 
         if lhs != rhs:
             lines.append((' * %s' % lhs, rhs, lhs_width))
                        (cur_artist, cur_album))
                 print_('Candidates:')
                 for i, match in enumerate(candidates):
+                    # Artist, album and distance.
                     line = ['%i. %s - %s (%s)' % (i + 1, match.info.artist,
                                                   match.info.album,
                                                   dist_string(match.distance))]
 
-                    # Point out the partial matches.
-                    if match.extra_items or match.extra_tracks:
-                        line.append(ui.colorize('yellow',
-                                                PARTIAL_MATCH_MESSAGE))
+                    # Penalties.
+                    penalties = penalty_string(match.distance, 3)
+                    if penalties:
+                        line.append(penalties)
 
-                    # Sources other than MusicBrainz.
-                    source = match.info.data_source
-                    if source != 'MusicBrainz':
-                        line.append(ui.colorize('turquoise', '(%s)' % source))
-
+                    # Disambiguation
                     disambig = disambig_string(match.info)
                     if disambig:
                         line.append(ui.colorize('lightgray', '(%s)' % disambig))

beets/ui/migrate.py

     config.yaml will be moved aside. Otherwise, the process is aborted
     when the file exists.
     """
+
+    # Load legacy configuration data, if any.
+    config, configpath = get_config()
+    if not config:
+        log.debug(u'no config file found at {0}'.format(
+            util.displayable_path(configpath)
+            ))
+        return
+
     # Get the new configuration file path and possibly move it out of
     # the way.
     destfn = os.path.join(beets.config.config_dir(), confit.CONFIG_FILENAME)
             # File exists and we won't replace it. We're done.
             return
 
-    # Load legacy configuration data, if any.
-    config, configpath = get_config()
-    if not config:
-        log.debug(u'no config file found at {0}'.format(
-            util.displayable_path(configpath)
-        ))
-        return
     log.debug(u'migrating config file {0}'.format(
         util.displayable_path(configpath)
     ))

beetsplug/beatport.py

+# This file is part of beets.
+# Copyright 2013, Adrian Sampson.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+"""Adds Beatport release and track search support to the autotagger
+"""
+import logging
+import re
+from datetime import datetime, timedelta
+
+import requests
+
+from beets.autotag.hooks import AlbumInfo, TrackInfo
+from beets.autotag.match import Distance
+from beets.plugins import BeetsPlugin
+
+log = logging.getLogger('beets')
+
+
+class BeatportAPIError(Exception):
+    pass
+
+
+class BeatportObject(object):
+    def __init__(self, data):
+        self.beatport_id = data['id']
+        self.name = unicode(data['name'])
+        if 'releaseDate' in data:
+            self.release_date = datetime.strptime(data['releaseDate'],
+                                                  '%Y-%m-%d')
+        if 'artists' in data:
+            self.artists = [(x['id'], unicode(x['name']))
+                            for x in data['artists']]
+        if 'genres' in data:
+            self.genres = [unicode(x['name'])
+                           for x in data['genres']]
+
+
+class BeatportAPI(object):
+    API_BASE = 'http://api.beatport.com/'
+
+    @classmethod
+    def get(cls, endpoint, **kwargs):
+        try:
+            response = requests.get(cls.API_BASE + endpoint, params=kwargs)
+        except Exception as e:
+            raise BeatportAPIError("Error connection to Beatport API: {}"
+                                   .format(e.message))
+        if not response:
+            raise BeatportAPIError(
+                "Error {0.status_code} for '{0.request.path_url}"
+                .format(response))
+        return response.json()['results']
+
+
+class BeatportSearch(object):
+    query = None
+    release_type = None
+
+    def __unicode__(self):
+        return u'<BeatportSearch for {0} "{1}" with {2} results>'.format(
+            self.release_type, self.query, len(self.results))
+
+    def __init__(self, query, release_type='release', details=True):
+        self.results = []
+        self.query = query
+        self.release_type = release_type
+        response = BeatportAPI.get('catalog/3/search', query=query,
+                                   facets=['fieldType:{0}'
+                                           .format(release_type)],
+                                   perPage=5)
+        for item in response:
+            if release_type == 'release':
+                release = BeatportRelease(item)
+                if details:
+                    release.get_tracks()
+                self.results.append(release)
+            elif release_type == 'track':
+                self.results.append(BeatportTrack(item))
+
+
+class BeatportRelease(BeatportObject):
+    API_ENDPOINT = 'catalog/3/beatport/release'
+
+    def __unicode__(self):
+        if len(self.artists) < 4:
+            artist_str = ", ".join(x[1] for x in self.artists)
+        else:
+            artist_str = "Various Artists"
+        return u"<BeatportRelease: {0} - {1} ({2})>".format(artist_str,
+                                                            self.name,
+                                                            self.catalog_number)
+
+    def __init__(self, data):
+        BeatportObject.__init__(self, data)
+        if 'catalogNumber' in data:
+            self.catalog_number = data['catalogNumber']
+        if 'label' in data:
+            self.label_name = data['label']['name']
+        if 'category' in data:
+            self.category = data['category']
+        if 'slug' in data:
+            self.url = "http://beatport.com/release/{0}/{1}".format(
+                data['slug'], data['id'])
+
+    @classmethod
+    def from_id(cls, beatport_id):
+        response = BeatportAPI.get(cls.API_ENDPOINT, id=beatport_id)
+        release = BeatportRelease(response['release'])
+        release.tracks = [BeatportTrack(x) for x in response['tracks']]
+        return release
+
+    def get_tracks(self):
+        response = BeatportAPI.get(self.API_ENDPOINT, id=self.beatport_id)
+        self.tracks = [BeatportTrack(x) for x in response['tracks']]
+
+
+class BeatportTrack(BeatportObject):
+    API_ENDPOINT = 'catalog/3/beatport/track'
+
+    def __unicode__(self):
+        artist_str = ", ".join(x[1] for x in self.artists)
+        return u"<BeatportTrack: {0} - {1} ({2})>".format(artist_str, self.name,
+                                                          self.mix_name)
+
+    def __init__(self, data):
+        BeatportObject.__init__(self, data)
+        if 'title' in data:
+            self.title = unicode(data['title'])
+        if 'mixName' in data:
+            self.mix_name = unicode(data['mixName'])
+        self.length = timedelta(milliseconds=data.get('lengthMs', 0) or 0)
+        if not self.length:
+            try:
+                min, sec = data.get('length', '0:0').split(':')
+                self.length = timedelta(minutes=int(min), seconds=int(sec))
+            except ValueError:
+                pass
+        if 'slug' in data:
+            self.url = "http://beatport.com/track/{0}/{1}".format(data['slug'],
+                                                                  data['id'])
+
+    @classmethod
+    def from_id(cls, beatport_id):
+        response = BeatportAPI.get(cls.API_ENDPOINT, id=beatport_id)
+        return BeatportTrack(response['track'])
+
+
+class BeatportPlugin(BeetsPlugin):
+    def __init__(self):
+        super(BeatportPlugin, self).__init__()
+        self.config.add({
+            'source_weight': 0.5,
+        })
+
+    def album_distance(self, items, album_info, mapping):
+        """Returns the beatport source weight and the maximum source weight
+        for albums.
+        """
+        dist = Distance()
+        if album_info.data_source == 'Beatport':
+            dist.add('source', self.config['source_weight'].as_number())
+        return dist
+
+    def track_distance(self, item, info):
+        """Returns the beatport source weight and the maximum source weight
+        for individual tracks.
+        """
+        return Distance()  # FIXME: Need source information for tracks.
+
+    def candidates(self, items, artist, release, va_likely):
+        """Returns a list of AlbumInfo objects for beatport search results
+        matching release and artist (if not various).
+        """
+        if va_likely:
+            query = release
+        else:
+            query = '%s %s' % (artist, release)
+        try:
+            return self._get_releases(query)
+        except BeatportAPIError as e:
+            log.debug('Beatport API Error: %s (query: %s)' % (e, query))
+            return []
+
+    def item_candidates(self, item, artist, title):
+        """Returns a list of TrackInfo objects for beatport search results
+        matching title and artist.
+        """
+        query = '%s %s' % (artist, title)
+        try:
+            return self._get_tracks(query)
+        except BeatportAPIError as e:
+            log.debug('Beatport API Error: %s (query: %s)' % (e, query))
+            return []
+
+    def album_for_id(self, release_id):
+        """Fetches a release by its Beatport ID and returns an AlbumInfo object
+        or None if the release is not found.
+        """
+        log.debug('Searching Beatport for release %s' % str(release_id))
+        match = re.search(r'(^|beatport\.com/release/.+/)(\d+)$', release_id)
+        if not match:
+            return None
+        release = BeatportRelease.from_id(match.group(2))
+        album = self._get_album_info(release)
+        return album
+
+    def track_for_id(self, track_id):
+        """Fetches a track by its Beatport ID and returns a TrackInfo object
+        or None if the track is not found.
+        """
+        log.debug('Searching Beatport for track %s' % str(track_id))
+        match = re.search(r'(^|beatport\.com/track/.+/)(\d+)$', track_id)
+        if not match:
+            return None
+        bp_track = BeatportTrack.from_id(match.group(2))
+        track = self._get_track_info(bp_track)
+        return track
+
+    def _get_releases(self, query):
+        """Returns a list of AlbumInfo objects for a beatport search query.
+        """
+        # Strip non-word characters from query. Things like "!" and "-" can
+        # cause a query to return no results, even if they match the artist or
+        # album title. Use `re.UNICODE` flag to avoid stripping non-english
+        # word characters.
+        query = re.sub(r'\W+', ' ', query, re.UNICODE)
+        # Strip medium information from query, Things like "CD1" and "disk 1"
+        # can also negate an otherwise positive result.
+        query = re.sub(r'\b(CD|disc)\s*\d+', '', query, re.I)
+        albums = [self._get_album_info(x)
+                  for x in BeatportSearch(query).results]
+        return albums
+
+    def _get_album_info(self, release):
+        """Returns an AlbumInfo object for a Beatport Release object.
+        """
+        va = len(release.artists) > 3
+        artist, artist_id = self._get_artist(release.artists)
+        if va:
+            artist = u"Various Artists"
+        tracks = [self._get_track_info(x, index=idx)
+                  for idx, x in enumerate(release.tracks, 1)]
+
+        return AlbumInfo(album=release.name, album_id=release.beatport_id,
+                         artist=artist, artist_id=artist_id, tracks=tracks,
+                         albumtype=release.category, va=va,
+                         year=release.release_date.year,
+                         month=release.release_date.month,
+                         day=release.release_date.day,
+                         label=release.label_name,
+                         catalognum=release.catalog_number, media=u'Digital',
+                         data_source=u'Beatport', data_url=release.url)
+
+    def _get_track_info(self, track, index=None):
+        """Returns a TrackInfo object for a Beatport Track object.
+        """
+        title = track.name
+        if track.mix_name != u"Original Mix":
+            title += u" ({0})".format(track.mix_name)
+        artist, artist_id = self._get_artist(track.artists)
+        length = track.length.total_seconds()
+
+        return TrackInfo(title=title, track_id=track.beatport_id,
+                         artist=artist, artist_id=artist_id,
+                         length=length, index=index)
+
+    def _get_artist(self, artists):
+        """Returns an artist string (all artists) and an artist_id (the main
+        artist) for a list of Beatport release or track artists.
+        """
+        artist_id = None
+        bits = []
+        for artist in artists:
+            if not artist_id:
+                artist_id = artist[0]
+            name = artist[1]
+            # Strip disambiguation number.
+            name = re.sub(r' \(\d+\)$', '', name)
+            # Move articles to the front.
+            name = re.sub(r'^(.*?), (a|an|the)$', r'\2 \1', name, flags=re.I)
+            bits.append(name)
+        artist = ', '.join(bits).replace(' ,', ',') or None
+        return artist, artist_id
+
+    def _get_tracks(self, query):
+        """Returns a list of TrackInfo objects for a Beatport query.
+        """
+        bp_tracks = BeatportSearch(query, release_type='track').results
+        tracks = [self._get_track_info(x) for x in bp_tracks]
+        return tracks

beetsplug/chroma.py

 from beets import config
 from beets.util import confit
 from beets.autotag import hooks
+from beets.autotag.match import Distance
 import acoustid
 import logging
 from collections import defaultdict
 
 class AcoustidPlugin(plugins.BeetsPlugin):
     def track_distance(self, item, info):
+        dist = Distance()
         if item.path not in _matches or not info.track_id:
             # Match failed or no track ID.
-            return 0.0, 0.0
+            return dist
 
         recording_ids, _ = _matches[item.path]
-        if info.track_id in recording_ids:
-            dist = 0.0
-        else:
-            dist = TRACK_ID_WEIGHT
-        return dist, TRACK_ID_WEIGHT
+        dist.add_expr('track_id', info.track_id not in recording_ids)
+        return dist
 
     def candidates(self, items, artist, album, va_likely):
         albums = []
         for relid in _all_releases(items):
-            matches = hooks._album_for_id(relid)
-            if matches:
-                albums.extend(matches)
+            album = hooks.album_for_mbid(relid)
+            if album:
+                albums.append(album)
 
         log.debug('acoustid album candidates: %i' % len(albums))
         return albums
         recording_ids, _ = _matches[item.path]
         tracks = []
         for recording_id in recording_ids:
-            track = hooks._track_for_id(recording_id)
+            track = hooks.track_for_mbid(recording_id)
             if track:
                 tracks.append(track)
         log.debug('acoustid item candidates: {0}'.format(len(tracks)))

beetsplug/discogs.py

 """
 from beets import config
 from beets.autotag.hooks import AlbumInfo, TrackInfo
-from beets.autotag.match import current_metadata, VA_ARTISTS
+from beets.autotag.match import current_metadata, Distance, VA_ARTISTS
 from beets.plugins import BeetsPlugin
 from discogs_client import Artist, DiscogsAPIError, Release, Search
 import beets
         })
 
     def album_distance(self, items, album_info, mapping):
-        """Returns the discogs source weight and the maximum source weight.
+        """Returns the album distance.
         """
+        dist = Distance()
         if album_info.data_source == 'Discogs':
-            return self.config['source_weight'].as_number() * \
-                    config['match']['weight']['source'].as_number(), \
-                    config['match']['weight']['source'].as_number()
-        else:
-            return 0.0, 0.0
+            dist.add('source', self.config['source_weight'].as_number())
+        return dist
 
     def candidates(self, items, artist, album, va_likely):
         """Returns a list of AlbumInfo objects for discogs search results

beetsplug/mbsync.py

         s.old_data = dict(s.record)
 
         # Get the MusicBrainz recording info.
-        track_info = hooks._track_for_id(s.mb_trackid)
+        track_info = hooks.track_for_mbid(s.mb_trackid)
         if not track_info:
             log.info(u'Recording ID not found: {0}'.format(s.mb_trackid))
             continue
             item.old_data = dict(item.record)
 
         # Get the MusicBrainz album information.
-        matches = hooks._album_for_id(a.mb_albumid)
-        if not matches:
+        album_info = hooks.album_for_mbid(a.mb_albumid)
+        if not album_info:
             log.info(u'Release ID not found: {0}'.format(a.mb_albumid))
             continue
-        album_info = matches[0]
 
         # Construct a track mapping according to MBIDs. This should work
         # for albums that have missing or extra tracks.

beetsplug/missing.py

     if len([i for i in album.items()]) < album.tracktotal:
         # fetch missing items
         # TODO: Implement caching that without breaking other stuff
-        matches = hooks._album_for_id(album.mb_albumid)
-        if matches:
-            album_info = matches[0]
+        album_info = hooks.album_for_mbid(album.mb_albumid)
         for track_info in getattr(album_info, 'tracks', []):
             if track_info.track_id not in item_mbids:
                 item = _item(track_info, album_info, album.id)

beetsplug/mpdupdate.py

 # easier.
 class BufferedSocket(object):
     """Socket abstraction that allows reading by line."""
-    def __init__(self, sep='\n'):
-        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    def __init__(self, host, port, sep='\n'):
+        if host[0] == '/':
+            self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            self.sock.connect(host)
+        else:
+            self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            self.sock.connect((host, port))
         self.buf = ''
         self.sep = sep
 
-    def connect(self, host, port):
-        self.sock.connect((host, port))
-
     def readline(self):
         while self.sep not in self.buf:
             data = self.sock.recv(1024)
     """
     print('Updating MPD database...')
 
-    s = BufferedSocket()
-    s.connect(host, port)
+    s = BufferedSocket(host, port)
     resp = s.readline()
     if 'OK MPD' not in resp:
         print('MPD connection failed:', repr(resp))

beetsplug/zero.py

                     continue
                 self._log.debug(u'[zero] \"{0}\" ({1}) match: {2}'
                                 .format(fval, fn, ' '.join(patterns)))
-                setattr(item, fn, type(fval)())
+                new_val = None if fval is None else type(fval)()
+                setattr(item, fn, new_val)
                 self._log.debug(u'[zero] {0}={1}'
                                 .format(fn, getattr(item, fn)))

docs/changelog.rst

 Changelog
 =========
 
-1.1.1 (in development)
+1.2.1 (in development)
 ----------------------
 
+This release introduces a major internal change in the way that similarity
+scores are handled, thanks to the continued efforts of Tai Lee. The changes
+you'll notice while using the autotagger are:
+
+* The top 3 distance penalties are now displayed on the release listing,
+  and all album and track penalties are now displayed on the track changes
+  list. This should make it clear exactly which metadata is contributing to a
+  low similarity score.
+* When displaying differences, the colorization has been made more consistent
+  and helpful: red for an actual difference, yellow to indicate that a
+  distance penalty is being applied, and light gray for no penalty (e.g., case
+  changes) or disambiguation data.
+
+There are also three new (or overhauled) configuration options that let you
+customize the way that matches are selected:
+
+* The :ref:`ignored` setting lets you instruct the importer not to show you
+  matches that have a certain penalty applied.
+* The :ref:`preferred` collection of settings specifies a sorted list of
+  preferred countries and media types, or prefer releases closest to the
+  original year for an album.
+* The :ref:`max_rec` settings can now be used for any distance penalty
+  component. The recommendation will be downgraded if a penalty is being
+  applied to the specified field.
+
+And some bug fixes:
+
+* Python 2.6 compatibility for :doc:`/plugins/beatport`. Thanks Wesley Bitter.
+* Don't move the config file during a null migration. Thanks to Theofilos
+  Intzoglou.
+* Fix an occasional crash in the :doc:`/plugins/beatport` when a length
+  field was missing from the API response. Thanks to Timothy Appnel.
+
+
+1.2.0 (June 5, 2013)
+--------------------
+
+There's a *lot* of new stuff in this release: new data sources for the
+autotagger, new plugins to look for problems in your library, tracking the
+date that you acquired new music, an awesome new syntax for doing queries over
+numeric fields, support for ALAC files, and major enhancements to the
+importer's UI and distance calculations. A special thanks goes out to all the
+contributors who helped make this release awesome.
+
+For the first time, beets can now tag your music using additional **data
+sources** to augment the matches from MusicBrainz. When you enable either of
+these plugins, the importer will start showing you new kinds of matches:
+
+* New :doc:`/plugins/discogs`: Get matches from the `Discogs`_ database.
+  Thanks to Artem Ponomarenko and Tai Lee.
+* New :doc:`/plugins/beatport`: Get matches from the `Beatport`_ database.
+  Thanks to Johannes Baiter.
+
+We also have two other new plugins that can scan your library to check for
+common problems, both by Pedro Silva:
+
 * New :doc:`/plugins/duplicates`: Find tracks or albums in your
-  library that are **duplicated**. Thanks to Pedro Silva.
+  library that are **duplicated**.
 * New :doc:`/plugins/missing`: Find albums in your library that are **missing
-  tracks**. Thanks once more to Pedro Silva.
-* New :doc:`/plugins/discogs`: Extends the autotagger to include matches from
-  the `Discogs`_ database. Thanks to Artem Ponomarenko and Tai Lee.
+  tracks**.
+
+There are also three more big features added to beets core:
+
 * Your library now keeps track of **when music was added** to it. The new
   ``added`` field is a timestamp reflecting when each item and album was
   imported and the new ``%time{}`` template function lets you format this
   **numeric ranges**. For example, you can get a list of albums from the '90s
   by typing ``beet ls year:1990..1999`` or find high-bitrate music with
   ``bitrate:128000..``. See :ref:`numericquery`. Thanks to Michael Schuerig.
+* **ALAC files** are now marked as ALAC instead of being conflated with AAC
+  audio. Thanks to Simon Luijk.
+
+In addition, the importer saw various UI enhancements, thanks to Tai Lee:
+
+* More consistent format and colorization of album and track metadata.
+* Display data source URL for matches from the new data source plugins. This
+  should make it easier to migrate data from Discogs or Beatport into
+  MusicBrainz.
+* Display album disambiguation and disc titles in the track listing, when
+  available.
+* Track changes are highlighted in yellow when they indicate a change in
+  format to or from the style of :ref:`per_disc_numbering`. (As before, no
+  penalty is applied because the track number is still "correct", just in a
+  different format.)
+* Sort missing and unmatched tracks by index and title and group them
+  together for better readability.
+* Indicate MusicBrainz ID mismatches.
+
+The calculation of the similarity score for autotagger matches was also
+improved, again thanks to Tai Lee. These changes, in general, help deal with
+the new metadata sources and help disambiguate between similar releases in the
+same MusicBrainz release group:
+
+* Strongly prefer releases with a matching MusicBrainz album ID. This helps
+  beets re-identify the same release when re-importing existing files.
+* Prefer releases that are closest to the tagged ``year``. Tolerate files
+  tagged with release or original year.
+* The new :ref:`preferred_media` config option lets you prefer a certain media
+  type when the ``media`` field is unset on an album.
+* Apply minor penalties across a range of fields to differentiate between
+  nearly identical releases: ``disctotal``, ``label``, ``catalognum``,
+  ``country`` and ``albumdisambig``.
+
+As usual, there were also lots of other great littler enhancements:
+
 * :doc:`/plugins/random`: A new ``-e`` option gives an equal chance to each
   artist in your collection to avoid biasing random samples to prolific
   artists. Thanks to Georges Dubus.
   Duailibe.
 * The importer output now shows the number of audio files in each album.
   Thanks to jayme on GitHub.
-* :doc:`/plugins/lyrics`: Lyrics searches should now turn up more results due
-  to some fixes in dealing with special characters.
 * Plugins can now provide fields for both Album and Item templates, thanks
   to Pedro Silva. Accordingly, the :doc:`/plugins/inline` can also now define
   album fields. For consistency, the ``pathfields`` configuration section has
   Johannes Baiter.
 * The :ref:`fields-cmd` command shows template fields provided by plugins.
   Thanks again to Pedro Silva.
+* :doc:`/plugins/mpdupdate`: You can now communicate with MPD over a Unix
+  domain socket. Thanks to John Hawthorn.
+
+And a batch of fixes:
+
 * Album art filenames now respect the :ref:`replace` configuration.
 * Friendly error messages are now printed when trying to read or write files
   that go missing.
 * The :ref:`modify-cmd` command can now change albums' album art paths (i.e.,
   ``beet modify artpath=...`` works). Thanks to Lucas Duailibe.
-* Various UI enhancements to the importer due to Tai Lee:
-
-  * More consistent format and colorization of album and track metadata.
-  * Display data source URL for :doc:`/plugins/discogs` matches. This should
-    make it easier for people who would rather import and correct data from
-    Discogs into MusicBrainz.
-  * Display album disambiguation and disc titles in the track listing, when
-    available.
-  * Track changes highlighted in yellow indicate a change in format to or from
-    :ref:`per_disc_numbering`. No penalty is applied because the track number
-    is still "correct", just in a different format.
-  * Sort missing and unmatched tracks by index and title and group them
-    together for better readability.
-  * Indicate MusicBrainz ID mismatches.
-
-* Improve calculation of similarity score:
-
-  * Strongly prefer releases with a matching MusicBrainz album ID. This helps
-    beets re-identify the same release when re-importing existing files.
-  * Prefer releases that are closest to the tagged ``year``. Tolerate files
-    tagged with release or original year.
-  * Prefer CD releases by default, when there is no ``media`` tagged in the
-    files being imported. This can be changed with the :ref:`preferred_media`
-    setting.
-  * Apply minor penalties across a range of fields to differentiate between
-    nearly identical releases: ``disctotal``, ``label``, ``catalognum``,
-    ``country`` and ``albumdisambig``.
+* :doc:`/plugins/zero`: Fix a crash when nulling out a field that contains
+  None.
+* Templates can now refer to non-tag item fields (e.g., ``$id`` and
+  ``$album_id``).
+* :doc:`/plugins/lyrics`: Lyrics searches should now turn up more results due
+  to some fixes in dealing with special characters.
 
 .. _Discogs: http://discogs.com/
+.. _Beatport: http://www.beatport.com/
+
 
 1.1.0 (April 29, 203)
 ---------------------
 project = u'beets'
 copyright = u'2012, Adrian Sampson'
 
-version = '1.1'
-release = '1.1.1'
+version = '1.2'
+release = '1.2.1'
 
 pygments_style = 'sphinx'
 

docs/guides/tagger.rst

   plugin if you're willing to spend a little more CPU power to get tags for
   unidentified albums.
 
-* Currently, MP3, AAC, FLAC, Ogg Vorbis, Monkey's Audio, WavPack, Musepack, and
-  Windows Media files are supported. (Do you use some other format? `Let me
-  know!`_)
+* Currently, MP3, AAC, FLAC, ALAC, Ogg Vorbis, Monkey's Audio, WavPack,
+  Musepack, and Windows Media files are supported. (Do you use some other
+  format? `Let me know!`_)
 
 .. _Let me know!: mailto:adrian@radbox.org
 

docs/plugins/beatport.rst

+Beatport Plugin
+===============
+
+The ``beatport`` plugin adds support for querying the `Beatport`_ catalogue
+during the autotagging process. This can potentially be helpful for users
+whose collection includes a lot of diverse electronic music releases, for which
+both MusicBrainz and (to a lesser degree) Discogs show no matches.
+
+.. _Beatport: http://beatport.com
+
+Installation
+------------
+
+To see matches from the ``beatport`` plugin, you first have to enable it in
+your configuration (see :doc:`/plugins/index`). Then, install the `requests`_
+library (which we need for querying the Beatport API) by typing::
+
+    pip install requests
+
+And you're done. Matches from Beatport should now show up alongside matches
+from MusicBrainz and other sources.
+
+If you have a Beatport ID or a URL for a release or track you want to tag, you
+can just enter one of the two at the "enter Id" prompt in the importer.
+
+.. _requests: http://docs.python-requests.org/en/latest/

docs/plugins/index.rst

    missing
    duplicates
    discogs
+   beatport
 
 Autotagger Extensions
 ''''''''''''''''''''''
 
 * :doc:`chroma`: Use acoustic fingerprinting to identify audio files with
   missing or incorrect metadata.
-* :doc:`discogs`: Search for releases in the discogs database.
+* :doc:`discogs`: Search for releases in the `Discogs`_ database.
+* :doc:`beatport`: Search for tracks and releases in the `Beatport`_ database.
+
+.. _Beatport: http://www.beatport.com/
+.. _Discogs: http://www.discogs.com/
 
 Metadata
 ''''''''

docs/plugins/mpdupdate.rst

         password: seekrit
 
 With that all in place, you'll see beets send the "update" command to your MPD server every time you change your beets library.
+
+If you want to communicate with MPD over a Unix domain socket instead over
+TCP, just give the path to the socket in the filesystem for the ``host``
+setting. (Any ``host`` value starting with a slash is interpreted as a domain
+socket.)

docs/reference/config.rst

 
 As mentioned above, autotagger matches have *recommendations* that control how
 the UI behaves for a certain quality of match. The recommendation for a certain
-match is usually based on the distance calculation. But you can also control
-the recommendation for certain specific situations by defining *maximum*
-recommendations when:
+match is based on the overall distance calculation. But you can also control
+the recommendation when a distance penalty is being applied for a specific
+field by defining *maximum* recommendations for each field:
 
-* a match came from a source other than MusicBrainz (e.g., the
-  :doc:`Discogs </plugins/discogs>` plugin);
-* a match has missing or extra tracks;
-* the length (duration) of at least one track differs; or
-* at least one track number differs.
-
-To define maxima, use keys under ``max_rec:`` in the ``match`` section::
+To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here
+are the defaults::
 
     match:
         max_rec:
-            non_mb_source: strong
-            partial: medium
-            tracklength: strong
-            tracknumber: strong
+            source: strong
+            artist: strong
+            album: strong
+            media: strong
+            mediums: strong
+            year: strong
+            country: strong
+            label: strong
+            catalognum: strong
+            albumdisambig: strong
+            album_id: strong
+            tracks: strong
+            missing_tracks: medium
+            unmatched_tracks: medium
+            track_title: strong
+            track_artist: strong
+            track_index: strong
+            track_length_grace: strong
+            track_length_max: strong
+            track_length: strong
+            track_id: strong
 
-If a recommendation is higher than the configured maximum and the condition is
-met, the recommendation will be downgraded. The maximum for each condition can
-be one of ``none``, ``low``, ``medium`` or ``strong``. When the maximum
-recommendation is ``strong``, no "downgrading" occurs for that situation.
+If a recommendation is higher than the configured maximum and a penalty is
+being applied, the recommendation will be downgraded. The maximum for each
+field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the
+maximum recommendation is ``strong``, no "downgrading" occurs.
 
-The above example shows the default ``max_rec`` settings.
+.. _preferred:
 
-.. _preferred_media:
+preferred
+~~~~~~~~~
 
-preferred_media
-~~~~~~~~~~~~~~~
+In addition to comparing the tagged metadata with the match metadata for
+similarity, you can also specify an ordered list of preferred countries and
+media types.
 
-When comparing files that have no ``media`` tagged, prefer releases that more
-closely resemble this media (using a string distance). When files are already
-tagged with media, this setting is ignored. Default: ``CD``.
+A distance penalty will be applied if the country or media type from the match
+metadata doesn't match. The order is important, the first item will be most
+preferred. Each item may be a regular expression, and will be matched case
+insensitively. The number of media will be stripped when matching preferred
+media (e.g. "2x" in "2xCD").
+
+You can also tell the autotagger to prefer matches that have a release year
+closest to the original year for an album.
+
+Here's an example::
+
+    match:
+        preferred:
+            countries: ['US', 'GB|UK']
+            media: ['CD', 'Digital Media|File']
+            original_year: yes
+
+By default, none of these options are enabled.
+
+.. _ignored:
+
+ignored
+~~~~~~~
+
+You can completely avoid matches that have certain penalties applied by adding
+the penalty name to the ``ignored`` setting::
+
+    match:
+        ignored: missing_tracks unmatched_tracks
 
 .. _path-format-config:
 
     shutil.copytree(os.path.join(docdir, '_build', 'man'), mandir)
 
 setup(name='beets',
-      version='1.1.1',
+      version='1.2.1',
       description='music tagger and library organizer',
       author='Adrian Sampson',
       author_email='adrian@radbox.org',
Add a comment to this file

test/rsrc/full.alac.m4a

Binary file added.

test/test_autotag.py

 from _common import unittest
 from beets import autotag
 from beets.autotag import match
+from beets.autotag.match import Distance
 from beets.library import Item
 from beets.util import plurality
 from beets.autotag import AlbumInfo, TrackInfo
         TrackInfo(u'three', None, u'some artist', length=1, index=3),
     ]
 
+class DistanceTest(unittest.TestCase):
+    def setUp(self):
+        self.dist = Distance()
+
+    def test_add(self):
+        self.dist.add('add', 1.0)
+        self.assertEqual(self.dist._penalties, {'add': [1.0]})
+
+    def test_add_equality(self):
+        self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi'])
+        self.assertEqual(self.dist._penalties['equality'], [0.0])
+
+        self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi'])
+        self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0])
+
+        self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I))
+        self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0, 0.0])
+
+    def test_add_expr(self):
+        self.dist.add_expr('expr', True)
+        self.assertEqual(self.dist._penalties['expr'], [1.0])
+
+        self.dist.add_expr('expr', False)
+        self.assertEqual(self.dist._penalties['expr'], [1.0, 0.0])
+
+    def test_add_number(self):
+        # Add a full penalty for each number of difference between two numbers.
+
+        self.dist.add_number('number', 1, 1)
+        self.assertEqual(self.dist._penalties['number'], [0.0])
+
+        self.dist.add_number('number', 1, 2)
+        self.assertEqual(self.dist._penalties['number'], [0.0, 1.0])
+
+        self.dist.add_number('number', 2, 1)
+        self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0])
+
+        self.dist.add_number('number', -1, 2)
+        self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0, 1.0,
+                                                          1.0, 1.0])
+
+    def test_add_priority(self):
+        self.dist.add_priority('priority', 'abc', 'abc')
+        self.assertEqual(self.dist._penalties['priority'], [0.0])
+
+        self.dist.add_priority('priority', 'def', ['abc', 'def'])
+        self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5])
+
+        self.dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef',
+                                                  re.compile('GH', re.I)])
+        self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75])
+
+        self.dist.add_priority('priority', 'xyz', ['abc', 'def'])
+        self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75,
+                                                            1.0])
+
+    def test_add_ratio(self):
+        self.dist.add_ratio('ratio', 25, 100)
+        self.assertEqual(self.dist._penalties['ratio'], [0.25])
+
+        self.dist.add_ratio('ratio', 10, 5)
+        self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0])
+
+        self.dist.add_ratio('ratio', -5, 5)
+        self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0])
+
+        self.dist.add_ratio('ratio', 5, 0)
+        self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0])
+
+    def test_add_string(self):
+        dist = match.string_dist(u'abc', u'bcd')
+        self.dist.add_string('string', u'abc', u'bcd')
+        self.assertEqual(self.dist._penalties['string'], [dist])
+
+    def test_distance(self):
+        config['match']['distance_weights']['album'] = 2.0
+        config['match']['distance_weights']['medium'] = 1.0
+        self.dist.add('album', 0.5)
+        self.dist.add('media', 0.25)
+        self.dist.add('media', 0.75)
+        self.assertEqual(self.dist.distance, 0.5)
+
+        # __getitem__()
+        self.assertEqual(self.dist['album'], 0.25)
+        self.assertEqual(self.dist['media'], 0.25)
+
+    def test_max_distance(self):
+        config['match']['distance_weights']['album'] = 3.0
+        config['match']['distance_weights']['medium'] = 1.0