<% use strict; use warnings; use Compress::Zlib (); use lib "/etc/magentanews"; use magenta_conf; #use lib "/home/magenta/bin/tkrobot"; #use html; use tkhtml; use lib $magenta_conf::bin_root."external_search_source/scmp"; use SCMP; use utilsDb; #use utilswww; use MagentaV0::Global; $MagentaV0::Global::v0_global->Script_OnStart(); use MagentaV0::Data::Factory; use MagentaRepository; use vars qw($nid $pid $sid $db $SCMPsourceid %query $Factory $country); use POSIX qw(uname); undef %query; %query = (); foreach my $query_ref ($Request->QueryString, $Request->Form) { while (my ($k, $v) = each %{$query_ref}) { $query{$k} = $v; } } $nid = shift || $query{d}; $pid = shift || $query{p}; $sid = shift || $query{s}; $country = shift || $query{c}; $Factory = MagentaV0::Data::Factory->new(); if ($magenta_conf::develop) { $SCMPsourceid = 7771; } else { $SCMPsourceid = 40540; } if ($nid =~ /^\d+$/o) { my $url = $Factory->get_document_factory()->get_by_id( $nid )->get_url(); my $html; if ( $sid == $SCMPsourceid ) { # Special hack for SCMP (South China Morning Post) $db = new utilsDb( 'slave_readonly' ) or die "Unable to utilsDb()"; my ($articleid) = $db->getonerow("SELECT articleid FROM scmp_article_2_document WHERE doc_id = $nid"); my $article = SCMP->new($db)->get_article($articleid); $html = ''; $html .= "
SCMP.comSCMP.comHome
 
".$article->get_createdate()."
".$article->get_title()."
".$article->get_author()."

\n"; $html .= $article->get_content()."

"; $html .= '

Copyright © 2007. South China Morning Post Publishers Ltd. All rights reserved.
Contact information, privacy policy and publication dates.

'; my $itr = q/(?:[^>"']*(?:(?:"[^"]*"|'[^']*')[^>"']*)*)/; $itr = new tkhtml()->insidetag_regexp(); $html =~ s{(<\s*head$itr>)}{$1 }si; $html =~ s{(<\s*body$itr>)}{$1
Follow this link to go to the original article at SCMP $url

}si; } elsif ( defined $country && $country eq "tkrobot" ) { $html = MagentaRepository->new(undef, $db)->extract_doc_db($nid)->{html}; my $itr = q/(?:[^>"']*(?:(?:"[^"]*"|'[^']*')[^>"']*)*)/; $itr = new tkhtml()->insidetag_regexp(); $html =~ s{(<\s*head$itr>)}{$1 }si; $html =~ s{(<\s*body$itr>)}{$1
This is a cached copy of $url

}si; } else { die ""; } print $html; } %>