#!/opt/perl/bin/perl -w

# $Id: audiotron-index.pl,v 1.30 2004/04/25 00:14:11 phil Exp $	

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# Originally based on Deadman's AudioTron TOC Generator Version 1.0
# Portions Copyright (c) by Sam Rowe 2002 

use File::Basename;
use File::Find ();
use File::stat;
use MP3::Info;
use Set::Scalar;
use Storable;
use strict;
use vars qw/*name *dir *prune/;
use LWP;

# NOTE - You also need Text::Iconv if you want to do charset conversions
# NOTE - if you're running on something like Linux.  Not req'd for Windows.

#############################################################
# BEGIN USER CONFIGURATION
#############################################################

# The audiotron toc filename is...
# This file will be created in each of your @music dirs (see below).
# (Don't touch this one!)
my $tocfile = "atrontc.vtc";

# I will store the cached music information in...
# This file will be created in each of your @music dirs (see below).
# (NOTE: This code needs read+write access to this file)
my ($cachefile) = "atrontc.cache";

# Directories I keep my MP3 files are...
# (Yes, add/modify/delete these)
# NOTE TO WINDOWS USERS - do not put spaces in your dirnames!
my @music_dirs = qw(
		    /opt/share/mp3s/
		    );

# A valid MP3 file on my systems ends with...
# (This is probably fine)
my @filename_suffixes = qw(
			   mp3
			   mp3s
			   mp3z
			   );

# If you have music files that are getting skipped over by the Audiotron,
# it may be because they have diacritic characters (i.e. not American english)
# in their filenames.  I solved this by doing a character set conversion.
# If you want to do charset conversions, put 1 here (otherwise, put 0);
# NOTE: THIS IS IGNORED WHEN RUNNING ON WINDOWS.
my $do_charset_conversions = 1;
my $from_charset = "850";
my $to_charset   = "ISO-8859-1";

# Do you want to see if new versions of this code are available each time you run it?
# (You may have trouble going through a proxy server if you use one...)
# (1 is yes, 0 is no)
my $check_for_updates = 1;

# Some folks like to drop the "The " from the beginning of a song's Artist's 
# name, so it sorts differently: "The Beatles" becomes either "Beatles" or
# "Beatles, The".
# To drop the leading "The ", set the following variable to 1
# To move the leading "The " top the end of the group's name, set the variable to 2
# Default behaviour is to leave the artist name alone.
my $leading_the = 2;

# Optionally, we can get the Audiotron to reload the TOC file at the end, which 
# saves you from having to do this manually.  What's the IP address of your Audiotron?
# Leave this value blank (i.e. set it to "") if you want to ignore this.
my $audiotron_hostname_or_ip = "192.168.1.46";
my $audiotron_username = "admin";
my $audiotron_password = "password";

#############################################################
# END USER CONFIGURATION
#############################################################

logit("Whirlycott Audiotron TOC Generator");
logit("<http://www.whirlycott.com/phil/software/audiotron/>");
logit("Copyright (C) 2003, 2004 Philip Jacob");
logit("Starting...");

#Do some directory checks before starting.
foreach my $dir ( @music_dirs ) {
    die ("Directory $dir doesn't exist!  Exiting.", 1) unless ( -e $dir );
}

#Some vars that File::Find might want to toy with.
*name   = *File::Find::name;
*dir    = *File::Find::dir;
*prune  = *File::Find::prune;

#Init the var that will hold all the mp3s that we find.
#This has to be package global for File::Find to be happy :(
my @list;

#Foreach music dir we find, we run the program.
foreach my $dir (@music_dirs) {
    handler($dir);
}      

#Get the AT to reload the TOC file.
reload_toc_file( $audiotron_hostname_or_ip, $audiotron_username, $audiotron_password );

#Check for updates?
if ( $check_for_updates ) {
    logit("Checking to see if a newer version of this code is available...");
    check_for_updates();
}

logit("Exiting happily.");

#############################################################
# SUBROUTINES
#############################################################


#This is the main entry point routine that gets called for each @music_dir.
sub handler {
    my ($dir) = @_;

    #What time did we start doing this?
    my $check_time = time;
    logit("Time stamp for this run on $dir will be: " . pretty_format_unixtime($check_time) );
    
    #Re-init this var because otherwise it will contain data from the list loop.
    #It needs to be a package global for the File::Find module.
    @list = ();
    
    #Open up the cache file if it exists.
    my (%cache);
    if ( -e "$dir/$cachefile" ) {
	my $ref_stat = stat("$dir/$cachefile");
	logit("Reading in cachefile of " . int ($$ref_stat[7] / 1024) . " Kb ...");
	%cache = %{ retrieve( "$dir/$cachefile" ) };
    } else {
	logit("Cache file wasn't found... I'll create one later.");
    }
    
    #Go searching for mp3s.
    File::Find::find( {wanted => \&wanted}, 
		      $dir );
    logit("Found " . scalar @list . " MP3 files...");


    #Set::Scalar is just wonderful.  Seriously.
    #What a chore this would be otherwise!
    my $set_cache = Set::Scalar->new( keys %cache );
    my $set_files = Set::Scalar->new( @list );

    #Get the intersection of the two sets.
    #This will be checked using stat().
    my $set_intersection = $set_cache * $set_files;

    #Make a list of mp3s that are gone.
    #We can just delete these altogether.
    #In fact, I can just not do this at all...!
    #my $set_gone = $set_cache - $set_intersection;

    #This is the list of mp3s to be added.
    #All of these need to be examined by MP3::Info.
    my $set_fullcheck = $set_files - $set_intersection;
    
    #For the intersection, we need to see if they're changed or modified.
    #For those files that are different, we need to add them to the fullcheck list.
    my $add_to_fullcheck = partial_check( $set_intersection,
					  $cache{'_last_runtime'} );

    #Now, at this point, we have:
    #  $add_to_fullcheck: needs to be fully scanned because they have changed
    #  $set_fullcheck: needs to be fully scanned because they are new
    #  ( $set_intersection - $set_fullcheck ): get these from the cache because they are the same as before

    #Let's combine the two sets that need to be fully scanned into one set.
    #Once again, I have to say that I love Set::Scalar.  So simple:
    $set_fullcheck = $set_fullcheck + $add_to_fullcheck;

    #We will make a new cache file based on a new var.
    my %newcache;
    $newcache{'_last_runtime'} = $check_time;

    #Let's populate the NEW cache with the "good" data from the old one...
    #if there is any data to reuse.
    if ( (keys %cache) > 0 ) {
	reuse_good_data( \%newcache,
			 \%cache,
			 ( $set_intersection - $set_fullcheck ) );
    }

    #Now, the real work of digging through the rest doing fullchecks.
    fullcheck( \%newcache,
	       $set_fullcheck ) ;

    #Store the cache file.
    logit("Storing cache file with updated timestamp...");
    store( \%newcache,
	   "$dir/$cachefile" );
    logit("\tDone!");

    #Generate the TOC file.
    logit("Generating TOC...");
    write_toc( $dir,
	       \%newcache );
    logit("\tDone!");
    
}


#Make the toc.
sub write_toc {
    my ($dir, $r_newcache) = @_;

    #Message if we're running on Windows....
    logit("Skipping character set conversions because we're running on MS Windows...") if running_on_windows();

    #Just store the whole toc in memory...
    my $toc;
    
    #Delete the meta keys.
    delete $r_newcache->{'_last_runtime'};

    #This kind of sucks, but I'm going to make a quick loop here
    #in order to reset the FILE field to something useful for the next
    #step.
    while ( my ($k, $v) = each %$r_newcache ) {
	
	if ( !$v->{FILE} ) {
	    logit("Problem: $k");
	    next;
	}

	#logit("Base: $v->{FILE}");
	$v->{FILE} = basename( $v->{FILE} );
    }

    #Loop through the newcache.
    #According to the Audiotron API docs, it helps if the files are alpha-sorted.
    my @sorted = sort {
	( $r_newcache->{$a}->{FILE} || "" )
	    cmp 
	    ( $r_newcache->{$b}->{FILE} || "" );
    }
    keys %$r_newcache;

    #Log a message once as opposed to on each iteration.
    #Also, require the iconv module and set it up.
    my $convertor;
    if ( !running_on_windows() && $do_charset_conversions ) {
	logit ("\tWill do character set conversions from $from_charset to $to_charset.");
	require Text::Iconv;
	$convertor = Text::Iconv->new( $from_charset, $to_charset);
    }
    
    foreach my $key ( @sorted ) {
	
	my $info = $r_newcache->{$key};
	
	#Do some manips on the dirname value.
	my $dirname = dirname( $key );
	$dirname =~ s/$dir//;
  	$dirname =~ s/\//\\/g;
	$dirname .= "\\";
	
	my $filename = basename $key;
	
	#Fixup the character set, but skip it if we're running on Windows.
	if ( !running_on_windows() && $do_charset_conversions ) {
	    $filename = $convertor->convert( $filename ) || logit( "Problem converting string: $key" );
	    $dirname  = $convertor->convert( $dirname  ) || logit( "Problem converting string: $dirname" );
	}
	
	#Process the leading "The "treatment.
	my $artist = process_leading_the( $info->{ARTIST} );

	$toc .= "SONG\n";
	$toc .= "FILE=" . $filename . "\n";
	$toc .= "DIR =$dirname\n";
	$toc .= "TCON=" . ( $info->{GENRE} ? $info->{GENRE} : "Unknown" ) . "\n";
	$toc .= "TLEN=" . ( $info->{SECS} ? int $info->{SECS} : "" ) . "\n";
	$toc .= "TRCK=" . ( $info->{TRACKNUM} ? $info->{TRACKNUM} : 0 ) . "\n";
	$toc .= "TALB=" . ( $info->{ALBUM} ? $info->{ALBUM} : "" ) . "\n";
	$toc .= "TPE1=" . ( $artist ? $artist : $dirname ) . "\n";
	$toc .= "TIT2=" . ( $info->{TITLE} ? $info->{TITLE} : $filename ) . "\n";
	$toc .= "END \n";
	
    }
    
    #Open up the toc file and write into it.
    open (F, ">$dir/$tocfile");
    print F $toc;
    close F;

    #Set the file permissions to something that the AT can see, probably.
    chmod 0644, "$dir/$tocfile";
}



#Handles moving the leading "The " substring around, if necessary.
sub process_leading_the {
    my ($artist) = @_;
    my $the = "The ";

    if ( $artist && $leading_the == 1 && substr($artist, 0, 4) eq $the ) {
	#Remove the leading "the"
	$artist = substr($artist, 4) . "\n";
	
    } elsif ( $artist && $leading_the == 2 && substr($artist, 0, 4) eq $the ) {
	#Move the leading "the" to the end
	$artist = substr($artist, 4) . ", $the\n";
	
    }

    return $artist;

}


#Do fullchecks on all of these files and store the data into the new cache.
sub fullcheck {
    my ($r_newcache, $set_fullcheck) = @_;
    my $total = $set_fullcheck->size;

    logit("Doing full checks on " . $total . " files...");
    
    my $counter = 0;

    my $divisor = 10;
    if ( $total > 5000 ) {
	$divisor = 100;
    } elsif ($total > 1000) {
	$divisor = 50;
    }
    
    while ( defined( my $key = $set_fullcheck->each) ) {
	$r_newcache->{$key} = get_mp3_info( $key );
	$counter++;
	logit("\tProcessed $counter/$total") unless ( $counter % $divisor );
    }
    
    logit("\tDone!");
}


#Get the MP3 info from a file.
#Return a MP3::Info object.
sub get_mp3_info {
    my ($file) = @_;
    
    my $mp3 = MP3::Info->new( $file );
    
    return $mp3;
}


#Reuse "good" cache data.
sub reuse_good_data {
    my ( $r_newcache, $r_cache, $set_good ) = @_;
    
    logit("Going to reuse old cache data...");
    
    #Iterate through the set of "good" data.
    while ( defined( my $key = $set_good->each ) ) {
	
	#Add the corresponding entry from the cache.
	$r_newcache->{$key} = $r_cache->{$key};
	
    } 
    
}


#Determine if these files have changed since our last runtime.
sub partial_check {
    my ($set, $time) = @_;
    $time = $time ? $time : 0;
    my ($fullcheck) = Set::Scalar->new;

    logit("Checking cached files for modifications since: " . pretty_format_unixtime($time) . " ...");

    foreach my $file ( $set->members ) {
	my $sb = stat( $file );
	
	#logit("\tmtime is " . $sb->mtime);
	
	if ( $sb->mtime >= $time ) {
	    $fullcheck->insert( $file );
	}
	
    }

    logit("\tFound " . $fullcheck->size . " modified or changed files.");
    logit("\tDone!");
    return $fullcheck;
}


#See if the file ends with one of our allowed suffixes, as well as 
#meeting other criteria.
sub wanted {
    my $filename = $File::Find::name;

    #These .AppleDouble dirs just seem like trouble to me...
    if ( $filename  =~ /\.AppleDouble|\.Parent/ ) {
	#logit("Skipping: $File::Find::name");
	return;

    } else {
	foreach my $suffix (@filename_suffixes) {
	    if ( $_ =~ /^\w.*?\.$suffix$/i ) {
		#logit("Keeping $name");
		push (@list, $File::Find::name);
	    }
	}
    }
}


#Log msgs.
sub logit {
    my ($msg, $fatal) = @_;
    
    if ( $msg ) {
	print $msg, "\n";
    }
    
    die if $fatal;
}


#Check for updates to this code.
sub check_for_updates {
    my $ua = LWP::UserAgent->new(env_proxy  => 1,
				 keep_alive => 0,
				 timeout    => 5,
				 );
    
    my $current_version = '$Revision: 1.30 $';
    my $response = $ua->get("http://www.whirlycott.com/phil/software/audiotron/tocgenerator-current-version.txt");

    logit("\tThe version you are using is: $current_version"      );
    logit("\tThe latest version is:        " . $response->content );

}

#Are we running on MS Windows?
sub running_on_windows {
    return ($ENV{'OS'} && ($ENV{'OS'} =~ /Win/i));
}


#Get the AT to reload the TOC.
sub reload_toc_file {
    logit("Contacting the Audiotron to reload new music files...");

    my ($hostname, $username, $password) = @_;
    my $url = "http://" . $hostname . "/goform/CheckNewFilesForm";
    #logit("\t" . $url);
    my $ua = LWP::UserAgent->new(env_proxy  => 1,
				 keep_alive => 0,
				 timeout    => 30,
				 );
    push @{ $ua->requests_redirectable }, 'POST';
    push @{ $ua->requests_redirectable }, 'GET';

    #You need read-access to this code in order to run it, which will give you
    #access to the username and password anyway, so there's no real security problem
    #here.  If you disagree, just comment out the next line.
    logit("\tLogging in to $hostname:80 with $username / $password ...");

    $ua->credentials("$hostname:80", "GoAhead", 
		     $username => $password);

    my %formdata = (
		    "Hosts" => "Press to Check"
		    );
    my $response = $ua->post($url, \%formdata);

    #If you are debugging, you can uncomment this.
    #logit($response->content);

    if ($response->is_error) {
	logit("\tERROR - There was a problem reloading the TOC on $hostname");
	logit("\tERROR - Verify the username and password provided.");
    } else {
	logit("\tDone!");
    }
}


#Pretty format an Epoch time string.
sub pretty_format_unixtime {
    return scalar localtime shift;
}
