Welcome to HBH! If you had an account on hellboundhacker.org you will need to reset your password using the Lost Password system before you will be able to login.

Find Links In HTML - Java Code Bank


Find Links In HTML
a little example on how to search a website for links. i am writing the program mostly to check if my website shows up on google when i search for specific key terms ( use this how ever you will). this is just a very basic example but it will be very easy to improve by reading the website from args[] or standard in (if you have any trouble with that post a comment). let me know if you use this to make anything cool (;
                import java.util.*;
import java.io.*;
import java.net.*;
import org.htmlparser.Parser;
import org.htmlparser.util.NodeList;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
public class getLinksTester
{

	public static void main(String [] args)
	throws IOException
	{

               //Url that you will Serch for links.
		String urltext = "http://www.google.com/search?q=this+is+a+google+search";
               
                //open connection
                URL SITE = new URL(urltext);
		URLConnection connection = SITE.openConnection();

                //IF you do not set a User Agent and try to search
               // Google you will get an error message 400
                connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2");
		connection.setDoInput(true);
                InputStream inStream = connection.getInputStream();
		BufferedReader input=new BufferedReader(new InputStreamReader(inStream));

		
                /*Read the HTML One Line at a time and  concatinate into one string*/
 
		String line;
                String htmlString = "";
		int i = 0;
		while ((line = input.readLine()) != null)
                {
                        htmlString += html.get(i);
                        i++;
                }

                //this list will hold all the links
		List<String> links = new ArrayList<String>();

//pars the html and search for links. 

try {

	URI uriLink = new URI(urltext);
	Parser parser = new Parser();
	parser.setInputHTML(htmlString);
	NodeList list = parser.extractAllNodesThatMatch(new NodeClassFilter (LinkTag.class));

	for (int ii = 0; ii < list.size (); ii++){
		LinkTag extracted = (LinkTag)list.elementAt(ii);
		String extractedLink = extracted.getLink();
                boolean add = links.add(extractedLink);
                //Print links to stdout. 
                System.out.println("Link: "+links.get(ii));
		
	}

	} catch (Exception e) {
            System.out.println("Trouble Finding Links");
	}

		input.close();
                 
	}


}

            
Comments
Sorry but there are no comments to display