1 # ! /usr/bin/python 2 # ''' 3 # File : getRFC.py 4 # Author : Mike 5 # E-Mail : Mike_Zhang@live.com 6 # ''' 7 import urllib,os,shutil,time 8 9 def downloadHtmlPage(url,tmpf = ''): 10 i = url.rfind( ' / ') 11 fileName = url[i+1:] 12 if tmpf : fileName = tmpf 13 print url, " -> ",fileName 14 urllib.urlretrieve(url,fileName) 15 print ' Downloaded ',fileName 16 time.sleep(0.2) 17 return fileName 18 19 # http://www.networksorcery.com/enp/rfc/rfc1000.txt 20 # http://www.networksorcery.com/enp/rfc/rfc6409.txt 21 if __name__ == ' __main__ ': 22 addr = ' http://www.networksorcery.com/enp/rfc ' 23 dirPath = " RFC " 24 # startIndex = 1000 25 startIndex = int(raw_input( ' start : ')) 26 # endIndex = 1001 27 endIndex = int(raw_input( ' end : ')) 28 if startIndex > endIndex : 29 print ' Input error! ' 30 if False == os.path.exists(dirPath): 31 os.makedirs(dirPath) 32 fileDownloadList = [] 33 logFile = open( " ./RFC/log.txt ", " w ") 34 for i in range(startIndex,endIndex+1): 35 try: 36 t_url = ' %s/rfc%d.txt ' % (addr,i) 37 fileName = downloadHtmlPage(t_url) 38 oldName = ' ./ '+fileName 39 newName = ' ./ '+dirPath+ ' / '+fileName 40 if True == os.path.exists(oldName): 41 shutil.move(oldName,newName) 42 print ' Moved ',oldName, ' to ',newName 43 msgLog = t_url + ' is ok '; 44 logFile.write(msgLog+ ' \n ') 45 except: 46 msgLog = ' get %s failed! ' % (i) 47 print msgLog 48 logFile.write(msgLog+ ' \n ') 49 continue 50 logFile.close()