Welcome to HBH! If you had an account on hellboundhacker.org you will need to reset your password using the Lost Password system before you will be able to login.

Automated Dictionary *props to spyware,root_op,and jjbutler88 - Python Code Bank


Automated Dictionary *props to spyware,root_op,and jjbutler88
An automated dictionary that allows you to type the words in and it will rip the definitions from merriam webster's site (note that depending on the layout of the page, you still may end up with arbitrary HTML). A copy of the code with syntax highlighting and tab support can be found at [url]http://pastebin.com/f7e56f5a3[/url] If you copy this code, please give proper credit
                import urllib2,os,os.path,string,re
from time import sleep

def printtext(texttoprint,loop_numb,response):
    #the response param tells whether or not the user chose to print to the screen, a file
    #or both
    if (response==1):
        print "Definition #%d %s \n" %(loop_numb+1, texttoprint)
    elif (response==2):
        fileloc=raw_input(r"Enter the directory(C:\example or C:\example\file.txt): ")
        if (os.path.exists(fileloc)):
                FILE=open(r"%s\definitions.txt"%(fileloc),"a")
                FILE.write(texttoprint)
                FILE.close()

        else:
                file=open(fileloc,"a")
                file.write(texttoprint)
                file.close()

    elif(response==3):
        filespec=raw_input(r"Enter the directory(C:\example or C:\example\file.txt): ")
        if (os.path.exists(filespec)):
                FILE=open(r"%s\definitions.txt"%(filespec),"a")
                FILE.write(texttoprint)
                FILE.close()

        else:
                filedata=open(filespec,"a")
                filedata.write(texttoprint)
                filedata.close()
        print "Definition #%d %s \n" %(loop_numb, texttoprint)
        
def parsetext(string1, iter,answ):
    
    #find the beginning of the first definition's occurrence (starts with <div ...>)
    startdef=string.find(string1,'<div class="defs">')
    
    #finds the end of the first def.'s occurrence (ends with </div>)
    enddef=string.find(string1,"</span></div>")
    
    #adds startdef to len of the 2nd find param to calibrate so that only text is attained
    begcoords=startdef+len('<div class="defs">')
    
    #subtracts enddef from len of 2nd find param to calibrate so that only text is attained
    endcoords=enddef-len("</span></div>")
    
    #typecast
    begcoords=int(begcoords)
    endcoords=int(endcoords)

    modedstring=string1[begcoords:endcoords]
    dictionary=[modedstring]

    #use an array to remember the position of te left and right brackets
    leftbracketflag=[]
    rightbracketflag=[]
    #set flags each time one is found to keep track of the arrays
    lflag=0
    rflag=0
    #attempt to get the coordinate(s) of (<,>)
    for i in range(0,len(modedstring)):
        if (modedstring[i]=="<"):
            leftbracketflag.append(i)
            lflag=lflag+1
        
        elif (modedstring[i]==">"):
                rightbracketflag.append(i)
                rflag=rflag+1

        else:
            continue
    print "(%d,%d)" %(lflag,rflag)
    #if a coordinate(s) is found,purge it (them)
    if( lflag!=0 and rflag!=0):
        if (lflag>rflag):
            for y in range(0,rflag):
                text=modedstring[leftbracketflag[y]:rightbracketflag[y]+1]
                newstring=string.replace(dictionary[y],text,"")
                dictionary.append(newstring)
        else:
            for z in range(0,lflag):
                text_to_remove=modedstring[leftbracketflag[z]:rightbracketflag[z]+1]
                newstring=string.replace(dictionary[z],text_to_remove,"")
                dictionary.append(newstring)
            

    #the third param contains the answer from the first function
    printtext(newstring, iter,answ)

def rip_def(arry,bool):
        #cycle through the array of definitions
	for i in range(0,len(arry)):
            #will open the url since the definition page has the definition in the url
	    handle=urllib2.urlopen("http://www.merriam-webster.com/dictionary/%s" %(arry[i]) )
	    #store results
	    data=handle.read()
	    #will typecast to string in case it is anything other than a string
	    data=str(data)
	    #the third parameter contains the numeral from get_definitions
	    parsetext(data,i,bool)

def get_definitions():
    answer=input("1 to print to screen only\n2 to print to file only\n3 to do both: ")
    definitions=raw_input("Enter definition terms (seperate with a comma without spaces): ")
    #will split the input at the comma delimiter
    def_arry=definitions.split(",")
    #third parameter will be carried over to the function printtext
    rip_def(def_arry,answer)
        
    

            
Comments
ghost's avatar
ghost 15 years ago

I'm going to forgive the fact that you used string splits (instead of regex) and the "props" in the titleā€¦ because you have decent code. There, I said it.

ghost's avatar
ghost 15 years ago

:D:D:D:D:D:D:D:D:D:D

fuser's avatar
fuser 14 years ago

man, that's a whole lot of code. but quite awesome imo.

fuser's avatar
fuser 14 years ago

man, that's a whole lot of code. but quite awesome imo.