Commits

Mathias Panzenböck committed 619d12c

only use SQLite for import, then do everything in JavaScript

  • Participants
  • Parent commits 855c419

Comments (0)

Files changed (1)

 var url     = require('url');
 var fs      = require('fs');
 var path    = require('path');
-var util    = require('util');
 var zlib    = require('zlib');
 var extend  = require('extend');
 var request = require('request');
 	return cb(null);
 };
 
-function sync (xs, f, end) {
-	var i = 0;
-	function next () {
-		if (i < xs.length) {
-			f(xs[i++], next);
-		}
-		else if (end) {
-			end();
-		}
-	}
-	next();
-}
-
-function repeat (x,n) {
-	var xs = new Array(n);
-	for (var i = 0; i < n; ++ i) {
-		xs[i] = x;
-	}
-	return xs;
-}
-
-function nargs (args) {
-	return repeat('?',args.length).join(',');
-}
-
-function concat () {
-	var rv = [];
-	var push = rv.push;
-	for (var i = 0; i < arguments.length; ++ i) {
-		push.apply(rv,arguments[i]);
-	}
-	return rv;
-}
-
 function uniq (xs) {
 	var ys = [];
 	var visited = {};
 	var hasAlbum = Object.prototype.hasOwnProperty.bind(songs_by_album);
 	for (var i = 0; i < songs.length; ++ i) {
 		var song = songs[i];
-		var albumname = song.albumname;
+		var albumname = song.album.albumname;
 		var album;
 		if (hasAlbum(albumname)) {
 			album = songs_by_album[albumname];
 		else {
 			album = songs_by_album[albumname] = [];
 		}
-		delete song.albumname;
-		album.push(song);
+		album.push(slice(song,'number','desc','duration','mp3'));
 	}
 	return songs_by_album;
 }
 
-function likeEscape (text) {
-	return text.replace(/([%_\\\\])/g,"\\$1");
-}
-
-function buildQuery (columns,words) {
-	// TODO: replace with full text search
-	var where = '('+columns.map(function (column) {
-			return '('+repeat(column+" like ? escape '\\'",words.length).join(' and ')+')';
-		}).join(' or ')+')';
-
-	words = words.map(function (word) { return '%'+likeEscape(word)+'%'; });
-	var args = concat.apply(this,repeat(words,columns.length));
-
-	return {where: where, args: args};
-}
-
 function collect (xs,prop) {
 	var ys = new Array(xs.length);
 	for (var i = 0; i < ys.length; ++ i) {
 	});
 }
 
+var FUZZY_COMPARE_OPTIONS = {
+	usage: 'sort',
+	sensitivity: 'base',
+	ignorePunctuation: true,
+	numeric: true
+};
+
+var fuzzyCompare = String.prototype.localeCompare && "a".localeCompare(".Ä","en",FUZZY_COMPARE_OPTIONS) === 0 ?
+	function (a, b) {
+		return a.localeCompare(b,'en',FUZZY_COMPARE_OPTIONS);
+	} :
+	function (a, b) {
+		a = a.toLowerCase();
+		b = b.toLowerCase();
+		return a < b ? -1 : a > b ? 1 : 0;
+	};
+
+function addVector (record,values) {
+	var vector = {};
+
+	for (var i = 0; i < values.length; ++ i) {
+		var value = (values[i]||'').trim();
+		if (value) {
+			value = unorm.nfkc(value).toLowerCase().split(/\s+/g);
+			for (var j = 0; j < value.length; ++ j) {
+				vector[value[j]] = true;
+			}
+		}
+	}
+
+	record.vector = Object.keys(vector).sort().join(' ');
+}
+
 function fulltextSearch (records, words) {
 	return records.filter(function (record) {
 		var vector = record.vector;
 	});
 }
 
+function artistsDateSorter (a, b) {
+	var cmp = a.latestdata - b.latestdata;
+	return cmp === 0 ? fuzzyCompare(a.artist, b.artist) : cmp;
+}
+
+function albumsDateSorter (a, b) {
+	var cmp = a.launchdate - b.launchdate;
+	return cmp === 0 ? fuzzyCompare(a.albumname, b.albumname) : cmp;
+}
+
+function songsDateSorter (a, b) {
+	var album1 = a.album;
+	var album2 = b.album;
+	var cmp = album1.launchdate - album2.launchdate;
+	if (cmp === 0) cmp = fuzzyCompare(album1.albumname, album2.albumname);
+	return cmp === 0 ? a.number - b.number : cmp;
+}
+
 mime.define({'application/x-web-app-manifest+json': ['webapp']});
 
 var app = express.createServer();
 var connections = {};
 
 var ACTIONS = {
+	'stat': function (req, res, done) {
+		var C = collection;
+
+		done(null, {
+			artists:   C.SortedArtists.length,
+			albums:    C.SortedAlbums.length,
+			songs:     C.SortedSongs.length,
+			genres:    C.SortedGenres.length,
+			countries: C.SortedCountries.length
+		});
+	},
 	'index': function (req, res, done) {
 		var artists = {};
 		var genres  = {};
 	}
 };
 
-// TODO
 var FINDER = {
 	'artists': function (req, res, query, order, done) {
-		var q = buildQuery(['artists.artist','artists.bio'],query);
-		var artistOrder = order === 'name' ? "artists.artist" : "latestdate desc, artists.artist";
-		exec(util.format(
-			'select artists.artist as artist, homepage, artists.description as description, max(launchdate) as latestdate from albums '+
-			'inner join artists on artists.artist = albums.artist '+
-			'where %s '+
-			'group by artists.artist, homepage, artists.description '+
-			'order by %s', q.where, artistOrder), q.args,
-			function (err, artists) {
-				if (err) return done(err);
-				var artistNames = [];
-				var artistsByName = {};
-				for (var i = 0; i < artists.length; ++ i) {
-					var artist = artists[i];
-					artist.albums = [];
-					var artistname = artist.artist;
-					artistNames.push(artistname);
-					artistsByName[artistname] = artist;
-				}
-				exec(util.format(
-					'select artist, albumname, sku, launchdate '+
-					'from albums '+
-					'where artist in (%s) '+
-					'order by launchdate desc', nargs(artistNames)), artistNames,
-					function (err, artistAlbums) {
-						if (err) return done(err);
-						for (var i = 0; i < artistAlbums.length; ++ i) {
-							var album = artistAlbums[i];
-							artistsByName[album.artist].albums.push(album);
-							delete album.artist;
-						}
-						done(null, artists);
-					});
-				
-			});
+		var artists = fulltextSearch(collection.SortedArtists, query);
+
+		if (order === 'date') {
+			artists.sort(artistsDateSorter);
+		}
+
+		for (var i = 0; i < artists.length; ++ i) {
+			var cArtist = artists[i];
+			var artist  = artists[i] = slice(cArtist, 'artist', 'homepage', 'description', 'latestdate');
+			var aAlbums = cArtist.albums;
+			var albums  = artist.albums = [];
+
+			for (var j = 0; j < aAlbums.length; ++ j) {
+				albums.push(slice(aAlbums[j], 'albumname', 'sku', 'launchdate'));
+			}
+
+			albums.sort(albumsDateSorter);
+		}
+
+		done(null, artists);
 	},
 	'albums': function (req, res, query, order, done) {
-		var q = buildQuery(['albums.albumname','albums.description'],query);
-		var albumOrder = order === 'name' ? "albums.albumname" : "launchdate desc, albums.albumname";
-		exec(util.format(
-			'select albumname, artist, sku, launchdate, itunes '+
-			'from albums '+
-			'where %s '+
-			'order by %s', q.where, albumOrder), q.args, done);
-	},
-	'artists-albums': function (req, res, query, order, done) {
-		var q = buildQuery(['artists.artist'],query);
-		var artistOrder = order === 'name' ? "artists.artist"   : "latestdate desc, artists.artist";
-		var albumOrder  = order === 'name' ? "albums.albumname" : "launchdate desc, albums.albumname";
+		var albums = fulltextSearch(collection.SortedAlbums, query);
 
-		exec(util.format(
-			'select artists.artist as artist, homepage, artists.description as description, max(launchdate) as latestdate from albums '+
-			'inner join artists on artists.artist = albums.artist '+
-			'where %s '+
-			'group by artists.artist, homepage, artists.description '+
-			'order by %s', q.where, artistOrder), q.args,
-			function (err, artists) {
-				if (err) return done(err);
-				var artistNames = [];
-				var artistsByName = {};
-				for (var i = 0; i < artists.length; ++ i) {
-					var artist = artists[i];
-					artist.albums = [];
-					var artistname = artist.artist;
-					artistNames.push(artistname);
-					artistsByName[artistname] = artist;
-				}
-				exec(util.format(
-					'select artist, albumname, sku, launchdate '+
-					'from albums '+
-					'where artist in (%s) '+
-					'order by %s', nargs(artistNames), albumOrder), artistNames,
-					function (err, artistAlbums) {
-						if (err) return done(err);
-						for (var i = 0; i < artistAlbums.length; ++ i) {
-							var album = artistAlbums[i];
-							artistsByName[album.artist].albums.push(album);
-							delete album.artist;
-						}
-						var q = buildQuery(['albums.albumname','songs.desc'],query);
-						exec(util.format(
-							'select distinct albums.albumname, sku, launchdate, artists.artist as artist, homepage '+
-							'from albums inner join songs on albums.albumname = songs.albumname '+
-							'inner join artists on artists.artist = albums.artist '+
-							'where '+
-							'albums.artist not in (%s) and %s '+
-							'order by %s', nargs(artistNames), q.where, albumOrder), concat(artistNames,q.args),
-							function (err, albums) {
-								if (err) return done(err);
-								done(null, {
-									artists: artists,
-									albums:  albums
-								});
-							});
-					});
-			});
+		if (order === 'date') {
+			albums.sort(albumsDateSorter);
+		}
+
+		done(null, albums.map(function (album) {
+			return {
+				albumname:  album.albumname,
+				artist:     album.artist.artist,
+				sku:        album.sku,
+				launchdate: album.launchdate,
+				itunes:     album.itunes
+			};
+		}));
 	},
 	'albums-songs': function (req, res, query, order, done) {
-		var q = buildQuery(['albums.albumname'],query);
-		var albumOrder = order === 'name' ? "albumname" : "launchdate desc, albumname";
+		var albums = fulltextSearch(collection.SortedAlbums, query);
+		var foundAlbums = {};
+		var hasAlbum = foundAlbums.hasOwnProperty.bind(foundAlbums);
 
-		exec(util.format(
-			'select distinct albumname from albums '+
-			'where %s order by %s', q.where, albumOrder), q.args,
-			function (err, rows) {
-				if (err) return done(err);
-				var albums = collect(rows,'albumname');
-				var q = buildQuery(['songs.desc'],query);
-				if (order === "name") {
-					exec(util.format(
-						'select number, desc, duration, mp3, albumname from songs '+
-						'where albumname not in (%s) and %s '+
-						'order by albumname, number', nargs(albums), q.where),
-						concat(albums,q.args),
-						onsongs);
-				}
-				else {
-					exec(util.format(
-						'select number, desc, duration, mp3, songs.albumname from songs '+
-						'inner join albums on albums.albumname = songs.albumname '+
-						'where songs.albumname not in (%s) and %s '+
-						'order by launchdate desc, songs.albumname, number',nargs(albums),q.where),
-						concat(albums,q.args),
-						onsongs);
-				}
+		for (var i = 0; i < albums.length; ++ i) {
+			foundAlbums[albums[i].albumname] = true;
+		}
 
-				function onsongs (err,rows) {
-					if (err) return done(err);
-					done(null,{
-						albums: albums,
-						songs: songsByAlbum(rows)
-					});
-				}
-			});
+		var songs = fulltextSearch(collection.SortedSongs.filter(function (song) {
+			return !hasAlbum(song.album.albumname);
+		}), query);
+
+		if (order === 'date') {
+			albums.sort(albumsDateSorter);
+			songs.sort(songsDateSorter);
+		}
+
+		done(null, {
+			albums: collect(albums,'albumname'),
+			songs:  songsByAlbum(songs)
+		});
 	},
 	'artists-albums-songs': function (req, res, query, order, done) {
-		var q = buildQuery(['artist'],query);
-		if (order === "name") {
-			exec(util.format(
-				'select distinct artist from albums '+
-				'where %s order by artist', q.where),
-				q.args, onartists);
+		var artists = fulltextSearch(collection.SortedArtists, query);
+		var foundArtists = {};
+		var hasArtist = foundArtists.hasOwnProperty.bind(foundArtists);
+
+		for (var i = 0; i < artists.length; ++ i) {
+			foundArtists[artists[i].artist] = true;
 		}
-		else {
-			exec(util.format(
-				'select artist, max(launchdate) as latestdate from albums '+
-				'where %s '+
-				'group by artist '+
-				'order by latestdate desc, artist', q.where),
-				q.args, onartists);
+		
+		var foundAlbums = {};
+		var albums = fulltextSearch(collection.SortedAlbums.filter(function (album) {
+			if (hasArtist(album.artist.artist)) {
+				foundAlbums[album.albumname] = true;
+				return false;
+			}
+			return true;
+		}), query);
+		var hasAlbum = foundAlbums.hasOwnProperty.bind(foundAlbums);
+
+		for (var i = 0; i < albums.length; ++ i) {
+			foundAlbums[albums[i].albumname] = true;
+		}
+		
+		var songs = fulltextSearch(collection.SortedSongs.filter(function (song) {
+			return !hasAlbum(song.album.albumname);
+		}), query);
+
+		if (order === 'date') {
+			artists.sort(artistsDateSorter);
+			albums.sort(albumsDateSorter);
+			songs.sort(songsDateSorter);
 		}
 
-		function onartists (err, rows) {
-			if (err) return done(err);
-			var artists = collect(rows,'artist');
-			var q = buildQuery(['albumname'],query);
-			if (order === "name") {
-				exec(util.format(
-					'select distinct albumname from albums '+
-					'where artist not in (%s) and %s '+
-					'order by albumname', nargs(artists), q.where),
-					concat(artists,q.args),
-					onalbums);
+		done(null, {
+			artists: collect(artists,'artist'),
+			albums:  collect(albums,'albumname'),
+			songs:   songsByAlbum(songs)
+		});
+	},
+	'genres-albums-songs': function (req, res, query, order, done) {
+		var genres = fulltextSearch(collection.SortedGenres, query);
+		var foundGenres = {};
+		var hasGenre = foundGenres.hasOwnProperty.bind(foundGenres);
+
+		for (var i = 0; i < genres.length; ++ i) {
+			foundGenres[genres[i].genre] = true;
+		}
+
+		var foundAlbums = {};
+		var albums = fulltextSearch(collection.SortedAlbums.filter(function (album) {
+			var genres = album.genres;
+			for (var i = 0; i < genres.length; ++ i) {
+				if (hasGenre(genres[i].genre)) {
+					// album is found via the matching genre
+					foundAlbums[album.albumname] = true;
+					return false;
+				}
 			}
-			else {
-				exec(util.format(
-					'select distinct albumname, launchdate from albums '+
-					'where artist not in (%s) and %s '+
-					'order by launchdate desc, albumname', nargs(artists), q.where),
-					concat(artists,q.args),
-					onalbums);
+			return true;
+		}), query);
+		var hasAlbum = foundAlbums.hasOwnProperty.bind(foundAlbums);
+
+		for (var i = 0; i < albums.length; ++ i) {
+			foundAlbums[albums[i].albumname] = true;
+		}
+
+		var songs = fulltextSearch(collection.SortedSongs.filter(function (song) {
+			return !hasAlbum(song.album.albumname);
+		}), query);
+
+		if (order === 'date') {
+			albums.sort(albumsDateSorter);
+			songs.sort(songsDateSorter);
+		}
+
+		done(null, {
+			genres: collect(genres,'genre'),
+			albums: collect(albums,'albumname'),
+			songs:  songsByAlbum(songs)
+		});
+	},
+	'genres-artists-albums-songs': function (req, res, query, order, done) {
+		var genres = fulltextSearch(collection.SortedGenres, query);
+		var foundGenres = {};
+		var hasGenre = foundGenres.hasOwnProperty.bind(foundGenres);
+
+		for (var i = 0; i < genres.length; ++ i) {
+			foundGenres[genres[i].genre] = true;
+		}
+
+		var artists = fulltextSearch(collection.SortedArtists, query);
+		var foundArtists = {};
+		var hasArtist = foundArtists.hasOwnProperty.bind(foundArtists);
+
+		for (var i = 0; i < artists.length; ++ i) {
+			foundArtists[artists[i].artist] = true;
+		}
+
+		var foundAlbums = {};
+		var albums = fulltextSearch(collection.SortedAlbums.filter(function (album) {
+			if (hasArtist(album.artist.artist)) {
+				foundAlbums[album.albumname] = true;
+				return false;
 			}
 
-			function onalbums (err, rows) {
-				if (err) return done(err);
-				var albums = collect(rows,'albumname');
-				var q = buildQuery(['songs.desc'],query);
-				var songsOrder = order === 'name' ?
-					'songs.albumname, number' :
-					'launchdate desc, songs.albumname, number';
-				
-				exec(util.format(
-					'select number, desc, duration, mp3, songs.albumname from songs '+
-					'inner join albums on songs.albumname = albums.albumname '+
-					'where songs.albumname not in (%s) and '+
-					'albums.artist not in (%s) and %s '+
-					'order by %s', nargs(albums), nargs(artists), q.where, songsOrder),
-					concat(albums,artists,q.args),
-					function (err, rows) {
-						if (err) return done(err);
-						done(null,{
-							artists: artists,
-							albums: albums,
-							songs: songsByAlbum(rows)
-						});
-					});
+			var genres = album.genres;
+			for (var i = 0; i < genres.length; ++ i) {
+				if (hasGenre(genres[i].genre)) {
+					foundAlbums[album.albumname] = true;
+					return false;
+				}
 			}
+			return true;
+		}), query);
+		var hasAlbum = foundAlbums.hasOwnProperty.bind(foundAlbums);
+
+		for (var i = 0; i < albums.length; ++ i) {
+			foundAlbums[albums[i].albumname] = true;
 		}
-	},
-	'genres-albums-songs': function (req, res, query, order, done) {
-		var q = buildQuery(['genre'],query);
-		exec(util.format(
-			'select distinct genre from genres where %s order by genre', q.where),
-			q.args,
-			function (err, rows) {
-				if (err) return done(err);
-				var genres = collect(rows,'genre');
-				var q = buildQuery(['albums.albumname'],query);
-				var albumsOrder = order === 'name' ?
-					"albums.albumname" :
-					"launchdate desc, albums.albumname";
-				exec(util.format(
-					'select distinct albums.albumname from albums '+
-					'inner join genres on genres.albumname = albums.albumname '+
-					'where genre not in (%s) and %s '+
-					'order by %s', nargs(genres), q.where, albumsOrder),
-					concat(genres,q.args),
-					function (err, rows) {
-						if (err) return done(err);
-						var albums = collect(rows,'albumname');
-						var q = buildQuery(['songs.desc'],query);
-						var songsOrder = order === 'name' ?
-							'genre, songs.albumname, number' :
-							'launchdate desc, genre, songs.albumname, number';
-						exec(util.format(
-							'select distinct number, desc, duration, mp3, songs.albumname from songs '+
-							'inner join albums on songs.albumname = albums.albumname '+
-							'inner join genres on albums.albumname = genres.albumname '+
-							'where songs.albumname not in (%s) and '+
-							'genre not in (%s) and %s '+
-							'order by %s', nargs(albums), nargs(genres), q.where, songsOrder),
-							concat(albums,genres,q.args),
-							function (err, rows) {
-								if (err) return done(err);
-								done(null,{
-									genres: genres,
-									albums: albums,
-									songs: songsByAlbum(rows)
-								});
-							});
-					});
-			});
-	},
-	'genres-artists-albums-songs': function (req, res, query, order, done) {
-		var q = buildQuery(['genre'],query);
-		exec(util.format(
-			'select distinct genre from genres where %s order by genre', q.where),
-			q.args,
-			function (err, rows) {
-				if (err) return done(err);
-				var genres = collect(rows,'genre');
-				var q = buildQuery(['artist'],query);
-				if (order === "name") {
-					exec(util.format(
-						'select distinct artist from albums '+
-						'inner join genres on genres.albumname = albums.albumname '+
-						'where genre not in (%s) and %s '+
-						'order by artist', nargs(genres), q.where),
-						concat(genres,q.args),
-						onartists);
-				}
-				else {
-					exec(util.format(
-						'select artist, max(launchdate) as latestdate from albums '+
-						'inner join genres on genres.albumname = albums.albumname '+
-						'where genre not in (%s) and %s '+
-						'group by artist '+
-						'order by latestdate desc, artist', nargs(genres), q.where),
-						concat(genres,q.args),
-						onartists);
-				}
 
-				function onartists (err, rows) {
-					if (err) return done(err);
-					var artists = collect(rows,'artist');
-					var q = buildQuery(['albums.albumname'],query);
-					var albumsOrder = order === "name" ?
-						"albums.albumname" :
-						"launchdate desc, albums.albumname";
-					exec(util.format(
-						'select distinct albums.albumname from albums '+
-						'inner join genres on genres.albumname = albums.albumname '+
-						'where genre not in (%s) and '+
-						'albums.artist not in (%s) and %s '+
-						'order by %s', nargs(genres), nargs(artists), q.where, albumsOrder),
-						concat(genres,artists,q.args),
-						function (err, rows) {
-							if (err) return done(err);
-							var albums = collect(rows,'albumname');
-							var q = buildQuery(['songs.desc'],query);
-							var songsOrder = order === "name" ?
-								'genre, songs.albumname, number' :
-								'launchdate desc, genre, songs.albumname, number';
-							exec(util.format(
-								'select distinct number, desc, duration, mp3, songs.albumname from songs '+
-								'inner join albums on songs.albumname = albums.albumname '+
-								'inner join genres on albums.albumname = genres.albumname '+
-								'where songs.albumname not in (%s) and '+
-								'genre not in (%s) and '+
-								'albums.artist not in (%s) and %s '+
-								'order by %s', nargs(albums), nargs(genres), nargs(artists), q.where, songsOrder),
-								concat(albums,genres,artists,q.args),
-								function (err, rows) {
-									if (err) return done(err);
-									done(null,{
-										genres: genres,
-										artists: artists,
-										albums: albums,
-										songs: songsByAlbum(rows)
-									});
-								});
-						});
-				}
-			});
+		var songs = fulltextSearch(collection.SortedSongs.filter(function (song) {
+			return !hasAlbum(song.album.albumname);
+		}), query);
+
+		if (order === 'date') {
+			albums.sort(albumsDateSorter);
+			songs.sort(songsDateSorter);
+		}
+
+		done(null, {
+			genres:  collect(genres,'genre'),
+			artists: collect(artists,'artist'),
+			albums:  collect(albums,'albumname'),
+			songs:   songsByAlbum(songs)
+		});
 	}
 };
 
 	res.redirect('http://magnatune.com/favicon.ico');
 });
 
+app.get('/api/stat',          action(ACTIONS.stat));
 app.get('/api/index',         action(ACTIONS.index));
 app.get('/api/album',         action(ACTIONS.album));
 app.get('/api/album/:name',   action(ACTIONS.album));
 		C.Countries   = {};
 		C.Songs       = {};
 
-		var strings = {};
-		var hasString = Object.prototype.hasOwnProperty.bind(strings);
-	
-//		async.parallel([
 		async.series([
 
 			// load artists
 					C.SortedCountries = [];
 
 					for (var i = 0; i < artists.length; ++ i) {
-						var artist = internFields(artists[i],'artist','homepage','city','state','country');
+						var artist = artists[i];
 
 						if (artist.country) {
 							var country;
 						}
 
 						artist.albums = [];
-						addVector(artist, ['artist','bio','description']);
+						addVector(artist, [artist.artist]);
 
 						C.Artists[artist.artist] = artist;
 					}
 
 					C.SortedCountries.sort(function (lhs,rhs) {
-						lhs = lhs.country;
-						rhs = rhs.country;
-						return lhs < rhs ? -1 : rhs < lhs ? 1 : 0;
+						return fuzzyCompare(lhs.country, rhs.country);
+					});
+
+					C.SortedArtists.sort(function (lhs,rhs) {
+						return fuzzyCompare(lhs.artist, rhs.artist);
 					});
 
 					console.timeEnd("loading artists");
 						var also  = (album.also||'').trim();
 
 						album.launchdate = Number(album.launchdate);
-						album.also  = also ? also.split(/\s+/).map(intern) : [];
-						album.songs = [];
+						album.also   = also ? also.split(/\s+/) : [];
+						album.songs  = [];
+						album.genres = [];
 
-						addVector(album, ['albumname', 'artist', 'description']);
+						addVector(album, [album.albumname, album.artist]);
 
-						C.AlbumsBySku[album.sku] = C.Albums[album.albumname] = internFields(album,'artist','albumname','sku');
+						C.AlbumsBySku[album.sku] = C.Albums[album.albumname] = album;
 					}
 
+					C.SortedAlbums.sort(function (lhs,rhs) {
+						return fuzzyCompare(lhs.albumname, rhs.albumname);
+					});
+
 					console.timeEnd("loading albums");
 
 					done(null);
 					console.time("loading genres");
 					var genres = C.SortedGenres = [];
 					for (var i = 0; i < albums_to_genres.length; ++ i) {
-						var album_to_genre = internFields(albums_to_genres[i],'genre','albumname');
+						var album_to_genre = albums_to_genres[i];
 						var genre;
 						if (C.hasGenre(album_to_genre.genre)) {
 							genre = C.Genres[album_to_genre.genre];
 								albums: []
 							};
 							
-							addVector(genre, ['genre']);
+							addVector(genre, [genre.genre]);
 
 							genres.push(genre);
 						}
 						genre.albums.push(album_to_genre.albumname);
 					}
 
+					C.SortedGenres.sort(function (lhs,rhs) {
+						return fuzzyCompare(lhs.genre, rhs.genre);
+					});
+
 					console.timeEnd("loading genres");
 
 					done(null);
 						var song = songs[i];
 						song.number   = Number(song.number);
 						song.duration = Number(song.duration);
-						song = internFields(song,'albumname','mp3','desc');
 						C.Songs[song.mp3] = song;
 					}
 
+					C.SortedSongs.sort(function (lhs,rhs) {
+						var cmp = fuzzyCompare(lhs.albumname, rhs.albumname);
+						return cmp === 0 ? lhs.number - rhs.number : cmp;
+					});
+
 					console.timeEnd("loading songs");
 
 					done(null);
 				var album = C.Albums[song.albumname];
 				album.songs.push(song);
 				song.album = album;
-				addVector(song, ['albumname','desc'], [album.artist]);
+				addVector(song, [song.albumname, song.desc, album.artist]);
 				delete song.albumname;
 			}
 			console.timeEnd("build references of songs");
 				album.relatedAlbums = album.also.map(function (sku) {
 					return C.AlbumsBySku[sku];
 				});
+				// awkward comparison so that it evaluates to true if latestdate is undefined:
+				if (!(artist.latestdate >= album.launchdate)) {
+					artist.latestdata = album.launchdate;
+				}
 				artist.albums.push(album);
 				album.artist = artist;
 			}
 			for (i = 0; i < C.SortedGenres.length; ++ i) {
 				var genre = C.SortedGenres[i];
 				genre.albums = genre.albums.map(function (albumname) {
-					return C.Albums[albumname];
+					var album = C.Albums[albumname];
+					album.genres.push(genre);
+					return album;
 				});
 			}
 			console.timeEnd("build references of genres");
 
 			done(null, C);
 		});
-
-		function addVector (record,fields,values) {
-			var vector = {};
-			var sources = fields.map(function (field) { return record[field]; });
-			if (values) sources.push.apply(sources,values);
-
-			for (var i = 0; i < sources.length; ++ i) {
-				var source = (sources[i]||'').trim();
-				if (source) {
-					source = unorm.nfkc(source).toLowerCase().split(/\s+/g);
-					for (var j = 0; j < source.length; ++ j) {
-						vector[source[j]] = true;
-					}
-				}
-			}
-
-			record.vector = Object.keys(vector).sort().join(' ');
-		}
-
-		function intern (s) {
-			if (hasString(s)) {
-				return strings[s];
-			}
-			else {
-				return (strings[s] = s);
-			}
-		}
-
-		function internFields (obj) {
-			for (var i = 1; i < arguments.length; ++ i) {
-				var key = arguments[i];
-				obj[key] = intern(obj[key]);
-			}
-			return obj;
-		}
 	}
 };