Welcome to HBH! If you had an account on hellboundhacker.org you will need to reset your password using the Lost Password system before you will be able to login.

Link Seeker - Perl Code Bank


Link Seeker
Seeks and prints all URLs from a website.
                #!/usr/bin/perl

# Coded by Trizen
# http://trizen.go.ro

use LWP::UserAgent;

$lwp = 'LWP::UserAgent'->new;
$lwp->agent('Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20');
$lwp->timeout(10);
$lwp->env_proxy;
foreach $_ (@ARGV) {
    if ($_ =~ /^[-]+m/) {
        $mainurl = 1;
    }
    if ($_ =~ /^[-]+h/) {
        &help;
    }
    if ($_ =~ /^[-]+i/) {
        $intern = 1;
    }
}
sub help {
    die "
 Usage: $0 <URL_list.txt> | <URLS>

 Options:
\t  -m, --main   : prints main URL
\t  -i, --intern : seek for intern URLs 
\t  -h, --help   : prints this message


\t  
 Example: $0 google.com -i > google_urls.txt

";
}
foreach $url (@ARGV) {
    next if $url =~ /^-/;
    if ($url =~ /\./ and not $url =~ /\.(txt|lst|list)$/) {
        unless ($url =~ /^http/) {
            $url = 'http://' . $url;
        }
        &Parse;
    }
    else {
        foreach $url (<ARGV>) {
            next if $url =~ /^-/;
            if ($url =~ /(www|http)([^"'\n<>]+)/) {
                $url = $&;
                unless ($url =~ /^http/) {
                    $url = 'http://' . $url;
                }
                &Parse;
            }
        }
    }
}
sub Parse {
    if ($mainurl) {
        print "\n\n=>> Mainurl: $url\n";
    }
    $content = $lwp->get($url)->content;
    @list = split(' ', $content, 0);
    foreach $line (@list) {
        if ($line =~ /http:([^"'\n<>]+)/ and not $1 =~ m[\.$|//www$|http://$]) {
            $found_url = $&;
            &Push_URL;
        }
        if ($intern) {
            if ($line =~ /href=['"\s]*([^'"\s<>]+)/) {
                $found_url = $1;
                unless ($found_url =~ /^http/) {
                    $url =~ s[http://([^/]+).*][http://$1/];
                    $found_url = "$url$found_url";
                    &Push_URL;
                }
            }
        }
    }
    @urls = sort  @urls;
    foreach $url (@urls) {
        next if $url eq $lasturl;
        print "$url\n";
        $lasturl = $url;
    }
    splice @urls;
    splice @list;
}
sub Push_URL {
    $found_url =~ s/\)$//;
    $found_url =~ s[\\/][/]g;
    $found_url =~ s/\\$//;
    $found_url =~ s[^http://][];
    $found_url =~ s[//][/]g;
    unless ($found_url =~ m[^http://]) {
        $found_url = 'http://' . $found_url;
    }
    push @urls, $found_url;
}

            
Comments
Sorry but there are no comments to display