#!/usr/bin/perl
use CGI;
my $cgi = CGI->new();
print "Content-Type: text/xml\n\n";

# open the homepage and get the news
open(F, "index.html");
my @homepage = <F>;
close(F);

# put the homepage in a variable
my $html = "";
foreach $line (@homepage) {
 $html .= $line;
}

# get only the news section
$html =~ s/^.+?<div id="news">(.+?)<\/div><!-- \/news-->.+$/$1/sm;

# remove html comments
$html =~ s/<!-- .+?-->//g;

# remove rss div
$html =~ s/<div id=\"rss\">.+?<\/div>//;

# remove all divisions
$html =~ s/<\/?div.*?>//g;

# remove all tabs
$html =~ s/\t//g;


sub formatDate {
  my $date = $_[0];
  chomp($date);
  my $okDate = `date --date='$date' +"%a, %d %b %Y 00:00:01 EST"`;
  chomp($okDate);
  return $okDate;
}

############################
# HARDCORE STUFF HERE
# REPLACE <p...</p> with RSS stuff
$html =~ s/<p>(\d{4}-\d{2}-\d{2})&nbsp;-&nbsp;(.*?)<a href="(.+?)">(.+?)<\/a>(.*?)<\/p>/<item>\n<title>${2}${4}${5}<\/title>\n<link>${3}<\/link>\n<pubDate>$1<\/pubDate>\n<\/item>/g;

# replace all dates from 2007-01-01 to Sat, 07 Sep 2002 00:00:01 GMT format
$html =~ s/(\d{4}-\d{2}-\d{2})/&formatDate($1)/eg;



print "<?xml version=\"1.0\" encoding=\"iso-8859-1\" ?>\n<rss version=\"2.0\" xmlns:discovery=\"http://www.lexum.com/e-discovery/\">\n<channel>\n <title>e-Discovery News</title>\n <description>e-Discovery News</description>\n <link>http://www.lexum.com/e-discovery/rss.pl</link>\n";

print $html;

print "\n</channel>\n</rss>\n";

1;
