#!/usr/bin/python2 ''' sget.py 1.0.1 (c) 2003-2006 Jan ONDREJ (SAL) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. ''' import sys,os,re,time,urllib,string,traceback import htmllib,formatter,urlparse,socket BLOCK_SIZE=1024*80 sf_net_servers=['ovh','osdn','citkit','switch','unc','aleron','unm', 'twtelecom','heanet','easynews','voxel'] exclude="/(GDL|CVS|CVSROOT)/" include_patt="." server_path="." base='XTTP://' errors=[] reg_url=re.compile('^(https?://)([^/]+)(.*)$',re.I) def totime(t): return "%d:%02d:%02d" % (t/60/60,(t/60)%60,t%60) def fnum(n,e='B',d=1024.0,max=9999.0): mb,me='',' ' if n>max: n=n/d mb,me='K','' if n>max: n=n/d mb='M' return "%4.1f %s%s%s" % (n,mb,e,me) def dequote(s): return urllib.unquote(re.sub("&","&",s)) class timer: def __init__(self): self.t0=time.time() self.t1=self.t0-2 self.startat=0L def reporthook(self,a,b,cp): bs=self.startat+a*b c=self.startat+cp self.c=c if ((self.t1+0.5)0: est=totime((cp-a*b)/bps) else: est="?:??:??" os="%11s / %11s, %11s, %3d%%, EST: %5s\r" \ % (fnum(bs,'B'),fnum(c,'B'),fnum(bps,'B/s'),percent,est) sys.stdout.write(os) sys.stdout.flush() class rparser(htmllib.HTMLParser): def anchor_bgn(self, href, name, type): if not re.search('^\?[NMSDC]=[DAMNS](;O=[AD])*$',href): if href and (href[0]=='/'): t=self.purl[0]+'://'+self.purl[1]+href elif re.search('^http://',href): t=href else: t=os.path.join(self.url,href) if re.search('^'+self.url,t): self.files.append(t) def seturl(self,url): self.url=url self.purl=urlparse.urlparse(url) self.files=[] return self def rmkdir(dir): f='' for i in re.split("/",dir): f=os.path.join(f,i) if f: try: os.mkdir(f) except OSError,(ec,es): if ec!=17: raise class myURLopenerBase(urllib.URLopener): with_redir=0 def http_error_206(self, url, fp, errcode, errmsg, headers): return urllib.addinfourl(fp, headers, "http:" + url) class myURLopener(myURLopenerBase): with_redir=1 def http_error_302(self, url, fp, errcode, errmsg, headers): self.redir_url=re.search("^Location: ([^\r\n]*)[\r\n]",str(headers), re.MULTILINE|re.DOTALL).group(1) def sget(url,path='',fd=0,uo=myURLopener): socket.setdefaulttimeout(30) while 1: fn=os.path.join(path,os.path.basename(dequote(url))) t=timer() f=uo() if not fd: fd=os.open(fn,os.O_WRONLY|os.O_APPEND|os.O_CREAT,0644) t.startat=os.stat(fn)[6] if t.startat>0: print ' Range: bytes='+str(t.startat)+'-' f.addheader('Range','bytes='+str(t.startat)+'-') #os.lseek(fd,t.startat,0) try: fp=f.open(re.sub("&","%26",url)) except IOError,err: try: _,ec,es,_=err except ValueError: ec,es=err if ec==416: print " Already complete." return t.startat elif ec==302 and f.with_redir>0: print " Redirect:",f.redir_url reg_new=reg_url.search(f.redir_url) reg_old=reg_url.search(url) if reg_new: return sget(f.redir_url,path,fd) elif f.redir_url[0]=='/': newurl=os.path.join(reg_old.group(1)+reg_old.group(2),f.redir_url) print "New URL:",newurl return sget(newurl,path,fd) else: newurl=os.path.join(os.path.dirname(url.split('?')[0]),f.redir_url) print "New URL:",newurl return sget(newurl,path,fd) elif str(es)=="timed out": print " Timeout." os.close(fd) fd=0 continue else: print " HTTP ERROR: "+str(ec)+": "+str(es) return -1 headers = fp.info() bs,blocknum,size=BLOCK_SIZE,1,-1 if headers.has_key("content-length"): size = long(headers["Content-Length"]) t.reporthook(0, bs, size) try: block = fp.read(bs) t.reporthook(1, bs, size) while block: os.write(fd,block) block = fp.read(bs) blocknum = blocknum + 1 t.reporthook(blocknum, bs, size) fp.close() os.close(fd) f.close() break except socket.timeout: print "\nTimeout, restarting ..." os.close(fd) fd=0 t.reporthook(size,1,size) print "" return t.c def rget(url,path=''): global errors print dequote(re.sub(os.path.dirname(base),"...",url)) if url[len(url)-1]=='/': shortpath=dequote(re.sub(os.path.dirname(base),"",url)) print include_patt,exclude,url if (not re.search(include_patt,url)) and (shortpath!="/"): print " Skipping (not included) ..." elif re.search(exclude,url): print " Skipping (excluded) ..." else: buf=urllib.urlopen(url).read(102400) p=rparser(formatter.NullFormatter()).seturl(url) p.feed(buf) for i in p.files: dir=dequote(os.path.dirname(re.sub(base,"",url))) rmkdir(dir) rget(i,dir) p.close() else: if re.search(exclude,url): print " Skipping (excluded) ..." elif re.search('/\.+$',url): print " Skipping (parent dir) ..." else: if sget(url,path)<0: errors.append(url) def isdir(path): try: if os.lstat(path+'/')[0] & 0x4000: # is_dir return True else: return False except: return False def start_server(root='.',host="0.0.0.0",port=7890): s=socket.socket(socket.AF_INET,socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1) try: s.bind((host,port)) s.listen(2) except socket.gaierror,(ec,es): print "BIND ERROR:",es sys.exit(2) while 1: try: conn,addr = s.accept() r=conn.recv(1024) reg1=re.search("^GET (.*) HTTP/1.[01]",r,re.IGNORECASE) dir=re.sub("/(\.\./|\./)*","/",root+dequote(reg1.group(1))) if isdir(dir): print "List:",dir hdr="HTTP/1.1 200 OK\r\nConnection: close\r\n"\ +"Content-Type: text/html\r\n\r\n" conn.send(hdr) for d in os.listdir(dir): fdir=os.path.join(dir,d) qd=urllib.quote(d) if isdir(fdir): conn.send(""+d+"/
\n") else: conn.send(""+d+"
\n") else: # is file print "Send:",dir reg2=re.search("^Range: bytes=([0-9]*)-",r, re.IGNORECASE|re.MULTILINE) if reg2: range_from=int(reg2.group(1)) print " Range from:",range_from else: range_from=0 flength=os.lstat(dir)[6] hdr="HTTP/1.1 200 OK\r\nConnection: close\r\n"\ +"Accept-Ranges: bytes\r\nContent-Length: "+str(flength)\ +"\r\nContent-Type: text/plain\r\n\r\n" conn.send(hdr) fd=open(dir) fd.seek(range_from) while 1: buf=fd.read(BLOCK_SIZE*10) if buf: conn.send(buf) else: break fd.close() conn.shutdown(2) conn.close() except socket.error,(ec,es): if ec not in [32,104]: raise except KeyboardInterrupt: print "\nInterrupted!" sys.exit(0) # check parameters n=1 durl=[] while n=0: break except KeyboardInterrupt: pass else: reg=re.search base=arg rget(arg) if errors: print "!!! ERRORS !!!" for i in errors: print i except KeyboardInterrupt: print "\nInterrupted!" sys.exit(0) except: print string.join(traceback.format_exception_only(sys.exc_type, sys.exc_value),', ') raise