HTML::LinkExtor or me ?

S

Saya

Hi,

This is the code:

sub Escape{
$item = shift;

use HTML::LinkExtor;

$p = HTML::LinkExtor->new(\&replaceURL, "");
$p->parse($item);

return $item;
}


sub replaceURL {

my(@links) = @_;


my $makeSubstitution = false;
my $newLink;

foreach my $link (@links) {
#$link =~ s/\/$//i;
$makeSubstitution = compareValues($link);

if ($makeSubstitution eq true) {
if($link =~ /http|www/) {
if ($link !~ /http/) {
$newLink = "http://" . $link;
}
else {
$newLink = $link;
}
$item =~ s/href=\"$link/href=\"\/redirect.asp?forwardURL=$newLink/i;
}
}
else {
if($link =~ /http|www/) {
if ($link !~ /http/) {
$item=~ s/href=\"$link/href=\"http:\/\/$link/i;
}
}
}
}
}

sub compareValues {
my $link = shift;

my @safeLinkArr;
@safeLinkArr = getSafeSites();
my $sizeOfArray = @safeLinkArr;
my $result = true;

if($sizeOfArray eq 0) {
return $result;
}

foreach my $safeLink (@safeLinkArr) {

if ( (0 <= (index($link, $safeLink))) or (0 <= (index($safeLink,
$link))) ) {
$result = false;
last;
}
else {
$result = true;
}
}

return $result;
}



sub getSafeSites {
use XML::DOM;

my $count;
my $WAPath;
my @linkArr;


foreach $arg (@ARGV)
{
if ($ARGV[$count] eq '-iw_include-location')
{
$WAPath = $ARGV[$count + 1];
}
$count++;
}

my $nonRedirectList = $WAPath . "/include/nonRedirectList.xml";

# --- Parsing the XML file ---
my $parser = XML::DOM::parser->new();
my $doc = $parser->parsefile($nonRedirectList);

# --- get all tags ---
my $links = $doc->getElementsByTagName('Link');
my $link;

for my $i (0..$links->getLength()-1) {
$link = $links->item($i);

if ($link->getFirstChild->getNodeValue) {
@linkArr[$i] = $link->getFirstChild->getNodeValue;
}
$i++;
}

$doc->dispose;

return @linkArr;
}

Escape($item);

$item = is real scenario is text + <a> + text <a> etc.

For some reason that I do not understand some links are not parsed
correctly. Does anyone have a reason for why this might be happening ?

I have looked at this problem for 2 days now, and can not find the
problem, so any help will be greatly appreciated :)

/Saya
 
G

Gisle Aas

This is the code:

sub Escape{
$item = shift;

use HTML::LinkExtor;

$p = HTML::LinkExtor->new(\&replaceURL, "");
$p->parse($item);

return $item;
}

What are you actually trying to do? Please describe that and remove
unrelated details from your example program before you post.

If you want to do substitutions on links in an HTML document, then
this example program might be a good start.

http://search.cpan.org/src/GAAS/HTML-Parser-3.36/eg/hrefsub
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,767
Messages
2,569,570
Members
45,045
Latest member
DRCM

Latest Threads

Top