#!/usr/bin/perl -w # # Converts a diff between two Audible.Com catalogue .tab files # such as are found at # http://www.audible.com/catalog/catalog-audiobooks.tab # into a human-readable format. # # Suggested to be used with the 'audible_newbooks' shell script. # # Last updated by rb on Wed Feb 25 17:25:12 EST 2004 # use strict; use vars qw/%TITLES %ALL_DATA $DEBUG @categories/; $DEBUG = 0; #$DEBUG = 1; # Order of categories in diff file (fixed) @categories = ( 'Title', 'Author', 'Author Surname', 'Narrator', 'Running Time', 'Price', 'Abridgedness', 'Category', 'Formats'); # # Main code begins here # while (<>) { #$ALL_DATA{$_}++ if s/^> //; #$ALL_DATA{$_}-- if s/^< //; if (s/^< //) { $ALL_DATA{$_}--; my @tmp = split(/\t/); my $title = shift(@tmp); #$TITLES{$title}--; #$DEBUG && print "Score down to $ALL_DATA{$_}/" . # $TITLES{$title} ? $TITLES{$title} : 0 . # " for $title\n"; } elsif (s/^> //) { $ALL_DATA{$_}++; # For new books we check the title isn't replicated. my @tmp = split(/\t/); my $title = shift(@tmp); $TITLES{$title}++; # $DEBUG && print "Score up to $ALL_DATA{$_}/$TITLES{$title} for $title\n"; } } =pod foreach (keys %TITLES) { my $title = $_; my $score = $TITLES{$_}; next unless $score > 1; # Only >1 titles are doubles $DEBUG && print "Double Title Detected: $title\n"; my @matchlines; foreach (sort keys %ALL_DATA) { my @data = split(/\t/); my $test_title = shift(@data); if ($test_title eq $title) { push @matchlines, $title; $DEBUG && "Match number $#matchlines for $title!\n"; } } # XXX TODO # deal with @matchlines here - find differences and point out. # put into printable form in an array then just join the array # on "\n" and print it! } =cut foreach (sort keys %ALL_DATA) { my $score = $ALL_DATA{$_}; next unless $score > 0; # Only +ve scores are new books $DEBUG && print "Score for this book: $score\n"; my @data = split(/\t/); my $tmp; foreach (@categories) { if ($_ eq "Author Surname") { # don't bother printing. $tmp = shift(@data); } elsif ($_ eq "Title") { # take off (Unabridged) at EOL - double info annoys $tmp = shift(@data); $tmp =~ s/\s+\((un)?abridged\)//i; print "$_: $tmp\n"; } elsif ($_ eq "Abridgedness") { # sometimes not specified $tmp = shift(@data); $tmp ||= "Unknown"; print "$_: $tmp\n"; } elsif ($_ eq "Formats") { # handle specially - at end of line so contains all the rest $tmp = join(" ", @data); print "$_: $tmp\n"; } else { # Most ones. $tmp = shift(@data); print "$_: $tmp\n"; } } print "\n"; } # # Ende. #