分类: 2007-07-26 18:56 698人阅读 (1)
可以扩充成为简单的抓取工具,定时抓取
# !usr/bin/python import urllib2,time; class ErrorHandler(urllib2.HTTPDefaultErrorHandler): def http_error_default(self, req, fp, code, msg, headers): result = urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) result.status = code return resultURL = ' http://www.ibm.com/developerworks/js/ajax1.js ' req = urllib2.Request(URL)mgr = urllib2.build_opener(ErrorHandler()) while True: ns = mgr.open(req) if (ns.headers.has_key( ' last-modified ' )): modified = ns.headers.get( ' last-modified ' ) if (ns.code == 304 ): print ''' ============================== NOT MODIFIED ============================== ''' elif (ns.code == 200 ): print ns.read() else : print ' there is an error ' ; if ( not locals().has_key( ' modified ' )): modified = time.time(); req.add_header( ' If-Modified-Since ' ,modified) time.sleep( 10 )