#### klinks v0.2beta
# coded by viper - viper@kuht.it - www.kuht.it
# released under the GNU GPL License - www.gpl.org
# This software scans for a given web page for links that point to files of a given type (by extension)
# and puts the resulting links in a html file, sorted in a table.
####
## initialization
from HTMLParser import HTMLParser
import urlparse,urllib,sys,os,os.path
links = []
## program core
class GetLinks(HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == 'a':
if attrs[0][0] == 'href':
if attrs[0][1].find('.'+extension) != -1:
if attrs[0][1].find('http://') != -1:
makeabslink(attrs[0][1])
else:
makerellink(inpage,attrs[0][1])
else:
pass
else:
pass
else:
pass
def makeabslink(url):
links.append(url)
def makerellink(baseurl,url):
links.append(urlparse.urljoin(baseurl,url))
def writeheader(htmlfile):
htmlfile.write("\n")
htmlfile.write("
Links to *." + extension + " Files found in %s \n" %(inpage))
htmlfile.write("\n")
htmlfile.write("\n")
htmlfile.write("\n")
htmlfile.write("Links to *." + extension + " Files found in " + inpage + '\n')
htmlfile.write("\n")
def writefooter(htmlfile):
htmlfile.write("\n")
htmlfile.write("\n")
htmlfile.write("\n")
htmlfile.write("\n")
def buildtable(array,htmlfile):
htmlfile.write("\n")
htmlfile.write("\n")
for n in range(len(array)):
htmlfile.write("\n")
htmlfile.write("| %d | \n" %('5%',n+1))
htmlfile.write("\n" %('95%'))
htmlfile.write(" %s \n" %(array[n],array[n]))
htmlfile.write(" | \n")
htmlfile.write("
\n")
htmlfile.write("
\n")
### runtime stuff
print "# Klinks Link Finder by viper (viperkuhtit)"
print "# released under GNU GPL License - www.gnu.org "
inpage=raw_input("Insert a valid web URL to scan: ")
if inpage.find("http://") == -1:
inpage = 'http://' + inpage
else:
pass
outfile=raw_input("Insert a name for the HTML output file: ")
extension=raw_input("Insert a file extension to seek (without the dot, i.e.: \"jpg\"): ")
outpage=str(os.path.dirname(sys.argv[0]) + '/' + outfile)
print " - Fetching " + inpage + "..."
try:
GetLinks().feed(urllib.urlopen(inpage).read())
except IOError:
print "Cannot Get URL. Maybe you misspelled it ? Exiting..."
sys.exit(0)
if len(links) == 0:
print "No links to *." + extension + " Files found in " + inpage
sys.exit(0)
else:
pass
print " - Opening " + outpage + " output file..."
outfile = open(outpage,'w')
print " - Building output page and links table..."
writeheader(outfile)
buildtable(links,outfile)
writefooter(outfile)
outfile.close()
print " - Done... check out " + outpage + " and press Enter to exit. "
raw_input("")