Welcome to HBH! If you have tried to register and didn't get a verification email, please using the following link to resend the verification email.

Find Links In HTML - Java Code Bank


Find Links In HTML
a little example on how to search a website for links. i am writing the program mostly to check if my website shows up on google when i search for specific key terms ( use this how ever you will). this is just a very basic example but it will be very easy to improve by reading the website from args[] or standard in (if you have any trouble with that post a comment). let me know if you use this to make anything cool (;
                import java.util.*;
import java.io.*;
import java.net.*;
import org.htmlparser.Parser;
import org.htmlparser.util.NodeList;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
public class getLinksTester
{

	public static void main(String [] args)
	throws IOException
	{

               //Url that you will Serch for links.
		String urltext = "http://www.google.com/search?q=this+is+a+google+search";
               
                //open connection
                URL SITE = new URL(urltext);
		URLConnection connection = SITE.openConnection();

                //IF you do not set a User Agent and try to search
               // Google you will get an error message 400
                connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2");
		connection.setDoInput(true);
                InputStream inStream = connection.getInputStream();
		BufferedReader input=new BufferedReader(new InputStreamReader(inStream));

		
                /*Read the HTML One Line at a time and  concatinate into one string*/
 
		String line;
                String htmlString = "";
		int i = 0;
		while ((line = input.readLine()) != null)
                {
                        htmlString += html.get(i);
                        i++;
                }

                //this list will hold all the links
		List<String> links = new ArrayList<String>();

//pars the html and search for links. 

try {

	URI uriLink = new URI(urltext);
	Parser parser = new Parser();
	parser.setInputHTML(htmlString);
	NodeList list = parser.extractAllNodesThatMatch(new NodeClassFilter (LinkTag.class));

	for (int ii = 0; ii < list.size (); ii++){
		LinkTag extracted = (LinkTag)list.elementAt(ii);
		String extractedLink = extracted.getLink();
                boolean add = links.add(extractedLink);
                //Print links to stdout. 
                System.out.println("Link: "+links.get(ii));
		
	}

	} catch (Exception e) {
            System.out.println("Trouble Finding Links");
	}

		input.close();
                 
	}


}

            
Comments
Sorry but there are no comments to display