<% #!/usr/bin/perl # # file: rss.asp # author: erik warendorph [ew] # date(s): 2005-10-04..2006-02-06..2006-08-01 # # description: # # - transform rss feed into html # #------------------------------------------------------------------------------ use strict; use warnings; use LWP::Simple; use vars qw( $url $content $num $item $title $aurl $esc_aurl $date %seen ); $url = ""; $content = ""; $num = 0; $item = ""; $title = ""; $aurl = ""; $esc_aurl = ""; $date = ""; %seen = (); select((select(STDOUT), $| = 1)[0]); #print "Content-type: text/html\n\n"; #print "
\n";
#foreach (sort keys %ENV) {
#  print &htmlquote($_), ": ", &htmlquote($ENV{$_}), "\n";
#}
#print "
\n"; ($url) = ($ENV{QUERY_STRING} =~ /(?:^|[?&;])url=(.*)$/); $url = "" unless defined($url); if (defined($url) and length($url)) { print < $url

$url

EOF $content = get("$url"); #print "$content"; # one xml feed if ($content =~ m{])[^>]*>.*?}s) { # hack to get fix $content =~ s{ < \s* ! \s* \[ \s* CDATA \s* \[ (.*?) \] \s* \] \s* > }{$1}gsx; print < EOF $num = 0; while ($content =~ m{])[^>]*>(.*?)}sg) { $item = $1; #print "$item"; ($title) = ($item =~ m{])[^>]*>(.*?)}sg); ($aurl) = ($item =~ m{])[^>]*>(.*?)}sg); ($date) = ($item =~ m{])[^>]*>(.*?)}sg); # if not rss 2 date, try rss 1 date if (not defined($date) or not length($date)) { ($date) = ($item =~ m{])[^>]*>(.*?)}sg); } foreach ($title, $aurl, $date) { $_ = "" unless defined($_); } $title = "(unknown title)" unless length($title); # special transformation of article urls for thisis rss feeds (these are # longer than 250 characters which is a problem for our system) if ($url =~ m{^http://rss\.thisis\.co\.uk/} and $aurl =~ /\bdestination=([^&]+)/) { ($aurl = $1) =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg; } if (length($aurl) and not $seen{$aurl}) { $num++; $esc_aurl = $aurl; $esc_aurl =~ s/&(?![A-Za-z0-9]+;)/&/g; print < $date $title
<$esc_aurl>
EOF } $seen{$aurl}++; } print <

$num articles

EOF } # one xml feed elsif ($content =~ m{])[^>]*>.*?}s) { # hack to get fix $content =~ s{ < \s* ! \s* \[ \s* CDATA \s* \[ (.*?) \] \s* \] \s* > }{$1}gsx; print < EOF $num = 0; while ($content =~ m{])[^>]*>(.*?)}sg) { $item = $1; #print "$item\n---------------------------------\n"; ($title) = ($item =~ m{])[^>]*>(.*?)}sg); ($aurl) = ($item =~ m{])[^>]*>(.*?)}sg); unless ( $aurl ) { print STDERR "ser etter annen url\n"; ($aurl) = ($item =~ m{])[^>]*>(.*?)}sg); # if not rss 2 date, try rss 1 date if (not defined($date) or not length($date)) { ($date) = ($item =~ m{])[^>]*>(.*?)}sg); } foreach ($title, $aurl, $date) { $_ = "" unless defined($_); } $title = "(unknown title)" unless length($title); # special transformation of article urls for thisis rss feeds (these are # longer than 250 characters which is a problem for our system) if ($url =~ m{^http://rss\.thisis\.co\.uk/} and $aurl =~ /\bdestination=([^&]+)/) { ($aurl = $1) =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg; } print STDERR "$title, $aurl, $date\n"; if (length($aurl) and not $seen{$aurl}) { $num++; $esc_aurl = $aurl; $esc_aurl =~ s/&(?![A-Za-z0-9]+;)/&/g; print < $date $title
<$esc_aurl>
EOF } $seen{$aurl}++; } print <

$num articles

EOF } # list of feeds (disabled) else { # while ($content =~ m{]*\bhref="([^\"]*)"[^>]*>(.*?)}sg) { # $title = $2; # $aurl = $1; # print <$title
#EOF # } } print < EOF } # no url, so show help else { print < rss.asp

rss.asp

Parameters:

url
URL to RSS feed (RSS 1 or RSS 2)
EOF } #sub htmlquote #{ # local($_); # $_ = $_[0]; # s/&/&/g; # s//>/; # return $_; #} %>