#!/usr/bin/env python
"""Guess release from audio files in a directory.

Automatically identify an album from some audio files.

Given a list of directories containing all and only the files of a single album,
run the fingerprint on the audio/mp3 files to identify the songs, and then apply
some simple heuristics using the track numbers from the filenames, the PUIDs of
the sound files and the MusicBrainz database to automatically guess what is the
most likely album release that corresponds to the directory. Note that this does
not make use of the header tags.

A file named 'saved.mb.release-id' is saved in each directory, with details on
the process.
"""

__author__ = 'Martin Blais <blais@furius.ca>'

# stdlib imports
import sys, os, re, logging, StringIO, time
from os.path import *
from collections import defaultdict
from operator import itemgetter

# musicdns imports
import musicdns, musicdns.cache

# mutagen imports
from mutagen import File

# musicbrainz imports
from musicbrainz2.webservice import Query, TrackFilter, WebServiceError, ReleaseIncludes


output_filename = 'saved.mb.release-id'

valid_extensions = ('.mp3', '.m4a', '.m4p', '.off', '.wav')

def process_dir(dn, musicdns_key):
    dn = abspath(dn)
    files = [join(dn, x) for x in os.listdir(dn)]

    # Create a list of (track, filename, puid).
    nfiles = []
    for fn in files:
        if splitext(fn)[1].lower() not in valid_extensions:
            continue
        try:
            puid, _ = cache.getpuid(fn, musicdns_key)
        except IOError:
            puid = None
        track = filename_track_number(fn)
        nfiles.append((track, fn, puid))
    nfiles.sort()

    # Output text.
    o = StringIO.StringIO()
    def write(s):
        ss = s + '\n'
        o.write(ss)
        sys.stdout.write(ss)
        sys.stdout.flush()

    # For each track, filter the releases by including only those whose track
    # number matches the filename track number.
    matchrel = defaultdict(int)
    for i, (no, fn, puid) in enumerate(nfiles):
        write(u'')
        write(fn)
        if puid is None:
            continue
        
        for _ in xrange(5): # Retry a few times
            try:
                freleases = find_releases(puid)
                break
            except WebServiceError, e:
                logging.error("Can't access releases for song, retrying: %s" % e)
                time.sleep(1)
        else:
            logging.error("Gave up on MusicBrainz.")
            return None, None

        for tno, track, release in freleases:
            # If the track numbers match, include the release in matches.
            included = 0
            if no == tno:
                matchrel[release.id] += 1
                included = 1
            write(u'    %d - %s - %s %s' % (tno, track.title, release.title,
                                            '***' if included else ''))

    write('')
    write('Matching releases:')
    write('')

    for i in xrange(5): # Retry a few times.
        try:
            q = Query()
            i = ReleaseIncludes(
                artist=True,
                tracks=True,
                urlRelations=True,
                #labels=True,
                )
            releaseids = sorted(matchrel.iteritems(), key=itemgetter(1), reverse=1)
            releases = [(q.getReleaseById(rid, i), freq) for rid, freq in releaseids]
            for rel, freq in releases:
                write(u'  (Frequency): %d' % freq)
                write(u'  Title: %s' % rel.title)
                write(u'  Artist: %s' % rel.artist.name)
                write(u'')
                for no, trk in enumerate(rel.tracks):
                    write(u'    %d. %s' % ((no+1), trk.title))
                write(u'')

                # Skip the releases whose number of tracks do not match.
                if len(rel.getTracks()) != len(nfiles):
                    logging.warning("Unmatched number of tracks.")
            break

        except WebServiceError, e:
            logging.error("Error accessing MusicBrainz: %s" % e)
            time.sleep(1)
            
    else:
        logging.error("Gave up on MusicBrainz.")
        return None, None

    # Select the most appropriate one.
    if releases:
        maxfreq = max(freq for rel, freq in releases)
        mreleases = [rel for rel, freq in releases if freq == maxfreq]

        ntmatches = [rel for rel in mreleases if len(rel.getTracks()) == len(nfiles)]
        chosen = ntmatches[0] if ntmatches else mreleases[0]
    else:
        chosen = None

    return chosen, o.getvalue()


def filename_track_number(fn):
    """Given filename, attempt to find the track numbers from the filenames."""
    mo = re.search('([0-9]+)', basename(fn))
    if mo:
        return int(mo.group(1))
        
def find_releases(puid):
    """Given a track's puid, return a list of
      (track-no, track, release)
    for each release that the song appeared on on."""
    q = Query()
    f = TrackFilter(puid=puid)
    results = q.getTracks(f)

    out = []
    for r in results:
        track = r.track
        rels = track.getReleases()
        assert len(rels) == 1
        rel = rels[0]
        out.append((rel.getTracksOffset()+1, track, rel))
    return out

def main():
    import optparse
    parser = optparse.OptionParser(__doc__.strip())

    parser.add_option('-k', '--musicdns-key', action='store_true',
                      help="Music DNS key")

    opts, args = parser.parse_args()

    if not opts.musicdns_key:
        keyfn = join(os.environ['HOME'], '.musicdns_key')
        opts.musicdns_key = open(keyfn).read().strip()

    musicdns.initialize()
        
    global cache; cache = musicdns.cache.MusicDNSCache()

    for dn in args:
        dn = abspath(dn)
        if not isdir(dn):
            continue
        print '... Processing %s ...' % dn
        release, text = process_dir(dn, opts.musicdns_key)
        if release is None:
            continue
        f = open(join(dn, output_filename), 'w')
        if release is not None:
            f.write(basename(release.id) + '\n\n')
            f.write(release.id + '.html\n')
        else:
            f.write('\n\n\n')
        f.write('\n')
        f.write(text.encode('utf8', 'replace'))
        f.close()

    musicdns.finalize()

if __name__ == '__main__':
    main()

