%
#!/usr/bin/perl
#
# file: rss.asp
# author: erik warendorph [ew]
# date(s): 2005-10-04..2006-02-06..2006-08-01
#
# description:
#
# - transform rss feed into html
#
#------------------------------------------------------------------------------
use strict;
use warnings;
use LWP::Simple;
use vars qw(
$url
$content
$num
$item
$title
$aurl
$esc_aurl
$date
%seen
);
$url = "";
$content = "";
$num = 0;
$item = "";
$title = "";
$aurl = "";
$esc_aurl = "";
$date = "";
%seen = ();
select((select(STDOUT), $| = 1)[0]);
#print "Content-type: text/html\n\n";
#print "
\n";
#foreach (sort keys %ENV) {
# print &htmlquote($_), ": ", &htmlquote($ENV{$_}), "\n";
#}
#print "
\n";
($url) = ($ENV{QUERY_STRING} =~ /(?:^|[?&;])url=(.*)$/);
$url = "" unless defined($url);
if (defined($url) and length($url)) {
print <
$url
$url
EOF
$content = get("$url");
#print "$content";
# one xml feed
if ($content =~ m{- ])[^>]*>.*?
}s) {
# hack to get fix
$content =~
s{ < \s* ! \s* \[ \s* CDATA \s* \[ (.*?) \] \s* \] \s* > }{$1}gsx;
print <
EOF
$num = 0;
while ($content =~ m{- ])[^>]*>(.*?)
}sg) {
$item = $1;
#print "$item";
($title) = ($item =~ m{])[^>]*>(.*?)}sg);
($aurl) = ($item =~ m{])[^>]*>(.*?)}sg);
($date) = ($item =~ m{])[^>]*>(.*?)}sg);
# if not rss 2 date, try rss 1 date
if (not defined($date) or not length($date)) {
($date) = ($item =~ m{])[^>]*>(.*?)}sg);
}
foreach ($title, $aurl, $date) {
$_ = "" unless defined($_);
}
$title = "(unknown title)" unless length($title);
# special transformation of article urls for thisis rss feeds (these are
# longer than 250 characters which is a problem for our system)
if ($url =~ m{^http://rss\.thisis\.co\.uk/} and
$aurl =~ /\bdestination=([^&]+)/) {
($aurl = $1) =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;
}
if (length($aurl) and not $seen{$aurl}) {
$num++;
$esc_aurl = $aurl;
$esc_aurl =~ s/&(?![A-Za-z0-9]+;)/&/g;
print <
$date $title
<$esc_aurl>
EOF
}
$seen{$aurl}++;
}
print <
$num articles
EOF
}
# one xml feed
elsif ($content =~ m{])[^>]*>.*?}s) {
# hack to get fix
$content =~
s{ < \s* ! \s* \[ \s* CDATA \s* \[ (.*?) \] \s* \] \s* > }{$1}gsx;
print <
EOF
$num = 0;
while ($content =~ m{])[^>]*>(.*?)}sg) {
$item = $1;
#print "$item\n---------------------------------\n";
($title) = ($item =~ m{])[^>]*>(.*?)}sg);
($aurl) = ($item =~ m{])[^>]*>(.*?)}sg);
unless ( $aurl ) {
print STDERR "ser etter annen url\n";
($aurl) = ($item =~ m{])[^>]*>(.*?)}sg);
# if not rss 2 date, try rss 1 date
if (not defined($date) or not length($date)) {
($date) = ($item =~ m{])[^>]*>(.*?)}sg);
}
foreach ($title, $aurl, $date) {
$_ = "" unless defined($_);
}
$title = "(unknown title)" unless length($title);
# special transformation of article urls for thisis rss feeds (these are
# longer than 250 characters which is a problem for our system)
if ($url =~ m{^http://rss\.thisis\.co\.uk/} and
$aurl =~ /\bdestination=([^&]+)/) {
($aurl = $1) =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;
}
print STDERR "$title, $aurl, $date\n";
if (length($aurl) and not $seen{$aurl}) {
$num++;
$esc_aurl = $aurl;
$esc_aurl =~ s/&(?![A-Za-z0-9]+;)/&/g;
print <
$date $title
<$esc_aurl>
EOF
}
$seen{$aurl}++;
}
print <
$num articles
EOF
}
# list of feeds (disabled)
else {
# while ($content =~ m{]*\bhref="([^\"]*)"[^>]*>(.*?)}sg) {
# $title = $2;
# $aurl = $1;
# print <$title
#EOF
# }
}
print <
EOF
}
# no url, so show help
else {
print <
rss.asp
rss.asp
Parameters:
- url
- URL to RSS feed (RSS 1 or RSS 2)