Facebook
Banner
XMPP JavaScript Library READ MORE

Python class to search for links in a web page.

Python, Sachin Puri, 2012-04-01 23:32:23

This python class will search for links in a webpage without using BeautifulSoup 

import urllib
import re

class Crawl:
	url=""

	def __init__(self,url):
		Crawl.url=url
	
	def getURL(self):
		print Crawl.url

	def links(self):
		cont=urllib.urlopen(Crawl.url).read()
		links=re.findall('<a.*href=(.*)>(.*)</a>', cont)
		for link in links:
			li=link[0]
			li=li.replace(''','')
			li=li.replace('"','')
			end=li.find(" ")
			if end>0:
				print li[:end]
			else:
				print li

crawl=Crawl("http://www.sachinpuri.com")
crawl.links()

 

Add Your Comment
   
    Yes! I want to receive all comments by email

No Comments Posted Yet. Be the first one to post comment