#!/usr/bin/perl # # Update the database of episodes with airdate info extracted from the # Doctor Who wikipedia page being piped into us. # my $topdir="/huge/vids"; my $whodir="$topdir/DoctorWho"; my $dbdir="$whodir/.data"; my $dbfile="$dbdir/allinfo.txt"; my %db; # Get PATH set to include this script's directory and other useful bits my $newpath=`dirname $0`; chomp($newpath); $newpath=`$newpath/echo-path`; chomp($newpath); $ENV{'PATH'}=$newpath; # Read in the existing database (if any) to start with known data # so this update won't discard any information. my $fh; my $r; if (open($fh, '<', $dbfile)) { while (<$fh>) { chomp; if (/^\[(.+)\]$/) { my $basename = $1; $r = {}; $db{$basename} = $r; } elsif (/^([A-Za-z0-9_]+)=(.+)$/) { if (defined($r)) { my $key = $1; my $val = $2; $r->{$key} = $val; } } } close($fh); undef($fh); undef($r); } my $in_row=0; my $in_col=0; my @cols; my $col_data=''; my %months; $months{'january'}=1; $months{'february'}=2; $months{'march'}=3; $months{'april'}=4; $months{'may'}=5; $months{'june'}=6; $months{'july'}=7; $months{'august'}=8; $months{'september'}=9; $months{'october'}=10; $months{'november'}=11; $months{'december'}=12; $months{'jan'}=1; $months{'feb'}=2; $months{'mar'}=3; $months{'apr'}=4; $months{'jun'}=6; $months{'jul'}=7; $months{'aug'}=8; $months{'sep'}=9; $months{'oct'}=10; $months{'nov'}=11; $months{'dec'}=12; sub process_cols { my $ar = shift; if (($ar->[0]=~/title=/) && ($ar->[5]=~/\d+\s+[A-Za-z]+\s+\d+/)) { my @titles = split(/\[0]); my @dates = split(/\[5]); if (scalar(@titles) == scalar(@dates)) { while (scalar(@titles) > 0) { my $t = shift(@titles); my $d = shift(@dates); if ($t=~/title=\"([^\"]+)\"/) { $t = $1; $t=~s/\(Doctor Who\)//g; $t=~s/Doctor Who\://g; $t=~s/^\s+//; $t=~s/\s+$//; $t=~s/\&[a-zA-Z0-9_]+\;//g; $t=~s/([\w']+)/\u\L$1/g; $t=~s/[^A-Za-z_0-9]//g; $t=~s/DoctorWhoEpisode$//; if ($d=~/(\d+\s+[A-Za-z]+\s+\d+)/) { $d = $1; @dmy=split(' ',$d); my $m=$dmy[1]; $m=~tr/A-Z/a-z/; $m=$months{$m}; $d=sprintf("%04d-%02d-%02d",$dmy[2],$m,$dmy[0]); my $r = $db{$t}; if (! defined($r)) { $r = {}; $db{$t} = $r; } if (! exists($r->{'airdate'})) { $r->{'airdate'} = $d; } } } } } } } while (<>) { if (/^\= 6) { &process_cols(\@cols); undef @cols; } } } # Save new db (keeping backup) sub compare_airdate { my $ada = $db{$a}->{'airdate'}; my $adb = $db{$b}->{'airdate'}; if (! defined($ada)) { $ada=''; } if (! defined($adb)) { $adb=''; } my $rval = $ada cmp $adb; if ($rval == 0) { $rval = $a cmp $b; } return $rval; } my $dbtemp="$dbfile.$$"; my $dbh; if (open($dbh, '>', $dbtemp)) { foreach $bn (sort compare_airdate keys %db) { $r = $db{$bn}; print $dbh "\n[$bn]\n"; my $key; my $val; foreach $key (sort(keys(%{$r}))) { $val = $r->{$key}; print $dbh "$key=$val\n"; } } close($dbh); unlink("$dbfile.bak"); link($dbfile,"$dbfile.bak"); unlink($dbfile); link($dbtemp,$dbfile); unlink($dbtemp); }