beets / beetsplug /

The default branch has multiple heads

# This file is part of beets.
# Copyright 2013, Adrian Sampson.
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

"""Fetches, embeds, and displays lyrics.
from __future__ import print_function

import urllib
import re
import logging

from beets.plugins import BeetsPlugin
from beets import ui
from beets import config

# Global logger.

log = logging.getLogger('beets')

# Lyrics scrapers.

COMMENT_RE = re.compile(r'<!--.*-->', re.S)
DIV_RE = re.compile(r'<(/?)div>?')
TAG_RE = re.compile(r'<[^>]*>')
BREAK_RE = re.compile(r'<br\s*/?>')

def fetch_url(url):
    """Retrieve the content at a given URL, or return None if the source
    is unreachable.
        return urllib.urlopen(url).read()
    except IOError as exc:
        log.debug(u'failed to fetch: {0} ({1})'.format(url, unicode(exc)))
        return None

def unescape(text):
    """Resolves &#xxx; HTML entities (and some others)."""
    if isinstance(text, str):
        text = text.decode('utf8', 'ignore')
    out = text.replace(u'&nbsp;', u' ')
    def replchar(m):
        num =
        return unichr(int(num))
    out = re.sub(u"&#(\d+);", replchar, out)
    return out

def extract_text(html, starttag):
    """Extract the text from a <DIV> tag in the HTML starting with
    ``starttag``. Returns None if parsing fails.
    # Strip off the leading text before opening tag.
        _, html = html.split(starttag, 1)
    except ValueError:

    # Walk through balanced DIV tags.
    level = 0
    parts = []
    pos = 0
    for match in DIV_RE.finditer(html):
        if # Closing tag.
            level -= 1
            if level == 0:
                pos = match.end()
        else: # Opening tag.
            if level == 0:

            level += 1

        if level == -1:
        print('no closing tag found!')
    lyrics = ''.join(parts)

    # Strip cruft.
    lyrics = COMMENT_RE.sub('', lyrics)
    lyrics = unescape(lyrics)
    lyrics = re.sub(r'\s+', ' ', lyrics) # Whitespace collapse.
    lyrics = BREAK_RE.sub('\n', lyrics) # <BR> newlines.
    lyrics = re.sub(r'\n +', '\n', lyrics)
    lyrics = re.sub(r' +\n', '\n', lyrics)
    lyrics = TAG_RE.sub('', lyrics) # Strip remaining HTML tags.
    lyrics = lyrics.strip()
    return lyrics

def _encode(s):
    """Encode the string for inclusion in a URL (common to both
    LyricsWiki and
    if isinstance(s, unicode):
        # Replace "fancy" apostrophes with straight ones.
        s = s.replace(u'\u2019', u"'")
        s = s.encode('utf8', 'ignore')
    return urllib.quote(s)

def _lw_encode(s):
    s = re.sub(r'\s+', '_', s)
    s = s.replace("<", "Less_Than")
    s = s.replace(">", "Greater_Than")
    s = s.replace("#", "Number_")
    s = re.sub(r'[\[\{]', '(', s)
    s = re.sub(r'[\]\}]', ')', s)
    return _encode(s)
def fetch_lyricswiki(artist, title):
    """Fetch lyrics from LyricsWiki."""
    url = LYRICSWIKI_URL_PATTERN % (_lw_encode(artist), _lw_encode(title))
    html = fetch_url(url)
    if not html:

    lyrics = extract_text(html, "<div class='lyricbox'>")
    if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
        return lyrics

    'Sorry, we do not have the lyric',
    'Submit Lyrics',
def _lc_encode(s):
    s = re.sub(r'\s+', '-', s)
    return _encode(s)
def fetch_lyricscom(artist, title):
    """Fetch lyrics from"""
    url = LYRICSCOM_URL_PATTERN % (_lc_encode(title), _lc_encode(artist))
    html = fetch_url(url)
    if not html:

    lyrics = extract_text(html, '<div id="lyric_space">')
    if not lyrics:
    for not_found_str in LYRICSCOM_NOT_FOUND:
        if not_found_str in lyrics:

    parts = lyrics.split('\n---\nLyrics powered by', 1)
    if parts:
        return parts[0]

BACKENDS = [fetch_lyricswiki, fetch_lyricscom]
def get_lyrics(artist, title):
    """Fetch lyrics, trying each source in turn."""
    for backend in BACKENDS:
        lyrics = backend(artist, title)
        if lyrics:
            if isinstance(lyrics, str):
                lyrics = lyrics.decode('utf8', 'ignore')
            log.debug(u'got lyrics from backend: {0}'.format(backend.__name__))
            return lyrics

# Plugin logic.

def fetch_item_lyrics(lib, loglevel, item, write):
    """Fetch and store lyrics for a single item. If ``write``, then the
    lyrics will also be written to the file itself. The ``loglevel``
    parameter controls the visibility of the function's status log
    # Skip if the item already has lyrics.
    if item.lyrics:
        log.log(loglevel, u'lyrics already present: %s - %s' %
                          (item.artist, item.title))

    # Fetch lyrics.
    lyrics = get_lyrics(item.artist, item.title)
    if not lyrics:
        log.log(loglevel, u'lyrics not found: %s - %s' %
                          (item.artist, item.title))

    log.log(loglevel, u'fetched lyrics: %s - %s' %
                      (item.artist, item.title))
    item.lyrics = lyrics
    if write:

class LyricsPlugin(BeetsPlugin):
    def __init__(self):
        super(LyricsPlugin, self).__init__()
        self.import_stages = [self.imported]
            'auto': True,

    def commands(self):
        cmd = ui.Subcommand('lyrics', help='fetch song lyrics')
        cmd.parser.add_option('-p', '--print', dest='printlyr',
                              action='store_true', default=False,
                              help='print lyrics to console')
        def func(lib, opts, args):
            # The "write to files" option corresponds to the
            # import_write config value.
            write = config['import']['write'].get(bool)
            for item in lib.items(ui.decargs(args)):
                fetch_item_lyrics(lib, logging.INFO, item, write)
                if opts.printlyr and item.lyrics:
        cmd.func = func
        return [cmd]

    # Auto-fetch lyrics on import.
    def imported(self, session, task):
        if self.config['auto']:
            for item in task.imported_items():
                fetch_item_lyrics(session.lib, logging.DEBUG, item, False)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.