Welcome to HBH! If you have tried to register and didn't get a verification email, please using the following link to resend the verification email.
Find Links In HTML - Java Code Bank
Find Links In HTML
a little example on how to search a website for links. i am writing the program mostly to check if my website shows up on google when i search for specific key terms ( use this how ever you will). this is just a very basic example but it will be very easy to improve by reading the website from args[] or standard in (if you have any trouble with that post a comment). let me know if you use this to make anything cool (;
import java.util.*;
import java.io.*;
import java.net.*;
import org.htmlparser.Parser;
import org.htmlparser.util.NodeList;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
public class getLinksTester
{
public static void main(String [] args)
throws IOException
{
//Url that you will Serch for links.
String urltext = "http://www.google.com/search?q=this+is+a+google+search";
//open connection
URL SITE = new URL(urltext);
URLConnection connection = SITE.openConnection();
//IF you do not set a User Agent and try to search
// Google you will get an error message 400
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2");
connection.setDoInput(true);
InputStream inStream = connection.getInputStream();
BufferedReader input=new BufferedReader(new InputStreamReader(inStream));
/*Read the HTML One Line at a time and concatinate into one string*/
String line;
String htmlString = "";
int i = 0;
while ((line = input.readLine()) != null)
{
htmlString += html.get(i);
i++;
}
//this list will hold all the links
List<String> links = new ArrayList<String>();
//pars the html and search for links.
try {
URI uriLink = new URI(urltext);
Parser parser = new Parser();
parser.setInputHTML(htmlString);
NodeList list = parser.extractAllNodesThatMatch(new NodeClassFilter (LinkTag.class));
for (int ii = 0; ii < list.size (); ii++){
LinkTag extracted = (LinkTag)list.elementAt(ii);
String extractedLink = extracted.getLink();
boolean add = links.add(extractedLink);
//Print links to stdout.
System.out.println("Link: "+links.get(ii));
}
} catch (Exception e) {
System.out.println("Trouble Finding Links");
}
input.close();
}
}
Comments
Sorry but there are no comments to display