- 在线时间
- 472 小时
- 最后登录
- 2025-9-5
- 注册时间
- 2023-7-11
- 听众数
- 4
- 收听数
- 0
- 能力
- 0 分
- 体力
- 7679 点
- 威望
- 0 点
- 阅读权限
- 255
- 积分
- 2884
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 1161
- 主题
- 1176
- 精华
- 0
- 分享
- 0
- 好友
- 1
该用户从未签到
 |
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。- #!/usr/bin/python; Y) g9 g\" k' e1 \& t* @
- # -*- coding: utf-8 -*-9 J& W0 L$ u$ t! G& n8 r
- # filename: paxel.py
# T; L7 q/ v: ] - 2 i6 ]) i- C+ A: V* `* t/ c
- '''It is a multi-thread downloading tool( B' z r9 J: u: d# H/ k
-
7 x9 l! I/ c4 U - It was developed follow axel.
5 }+ z# x* V0 h% P: p\" i& Y/ ? - Author: volans
: Q& ?1 T5 e' R. L: j: n\" p - E-mail: volansw [at] gmail.com5 _, |\" J2 d\" V$ z
- '''$ s. U7 u, j, x2 @2 X( K, n, j
- 4 ]8 m3 t: q& c% A
- import sys) b0 s& q6 u. E+ h\" m' g
- import os x6 S' r7 ^( {) ]* `, ?7 a5 |
- import time/ C0 H+ J9 Y. @! }3 m3 ]
- import urllib
+ P. O# u* P1 _1 R7 T - from threading import Thread1 N9 `% `0 |8 `% j
- 5 U& `3 D2 F& X
- local_proxies = {'http': 'http://131.139.58.200:8080'}6 s7 y# s9 ^5 c* t' c1 ^! H
- - T\" o+ X0 V+ @. P7 A6 I
- class AxelPython(Thread, urllib.FancyURLopener):( A7 t. }% t* m9 ^7 x\" O r
- '''Multi-thread downloading class.6 }6 }3 D$ O% P# R* v) d\" M8 j
-
1 t3 ]6 Y5 j; c2 h4 S4 A0 f# c - run() is a vitural method of Thread.; t2 x/ j- c/ H% N( C\" Z
- '''
4 ~3 o: ?& s8 V, F$ U - def __init__(self, threadname, url, filename, ranges=0, proxies={}):
2 c% i: k# I9 v\" |7 O* Q - Thread.__init__(self, name=threadname)5 x\" X/ I% ~( x4 ]
- urllib.FancyURLopener.__init__(self, proxies)
\" }& ^7 | m. [ - self.name = threadname4 d4 c- w6 y\" y0 |\" x
- self.url = url
7 j6 A f, G2 E7 |, t - self.filename = filename
3 C7 E2 k+ A/ E+ w! I+ G' L4 q - self.ranges = ranges' r9 f$ k! } A) L) p1 e! A
- self.downloaded = 05 n' W; G J) y6 B* n/ ~& w
-
6 x; y& e/ Z5 H+ c - def run(self):
, P& ^! x2 b7 U9 J, r - '''vertual function in Thread'''
1 b) a- T- ]0 [8 K3 | - try:' ^9 ^7 ?6 ^& R y2 ]$ g
- self.downloaded = os.path.getsize( self.filename )
* s5 _9 h\" }# t; D - except OSError:5 P' U+ I9 [( X
- #print 'never downloaded'( y+ D( M7 `2 j) r$ ^
- self.downloaded = 0
. L6 h: m1 U: F2 c0 u6 i - 1 g/ J# S, Y# ^ H: b8 d4 a
- # rebuild start poind
3 B8 O! ^) f* f0 ~& a - self.startpoint = self.ranges[0] + self.downloaded8 w7 b( W2 W# B
- * Y0 E/ L' w; C& O4 o7 ]! }; b
- # This part is completed
; }% q% `( Y( _$ d: a0 ^ - if self.startpoint >= self.ranges[1]:
# `, o) [8 \ V' ^ - print 'Part %s has been downloaded over.' % self.filename
9 j\" ^% i4 C+ D( F* b - return3 {) n! S- w6 w) G# B3 b
- $ [4 T; [& z. A g) b1 _* ^
- self.oneTimeSize = 16384 #16kByte/time
+ n0 b0 @; W+ W& W1 k - print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
6 p( N. {& @( x -
0 k5 N0 y5 E( W, c - self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))) d& l7 z0 j7 l9 D
-
8 C- S5 a& j& O) W\" y - self.urlhandle = self.open( self.url )
6 B: y\" N; h ]& H9 J -
( S$ M5 z. i- o$ I. j - data = self.urlhandle.read( self.oneTimeSize )2 y( B# a5 p C7 ~5 _
- while data:- D3 ?: k3 L( v5 N) m T3 d# V
- filehandle = open( self.filename, 'ab+' )3 d* Q# W# Y+ I\" W* H
- filehandle.write( data )8 ]2 ^' y. @* W2 `) n9 x& r
- filehandle.close()
( N+ P8 R, k, r -
' G/ @0 d# C! _4 k! j; W8 y' | W- a - self.downloaded += len( data )
% o ^4 l! k L a9 R1 m - #print "%s" % (self.name)
% Q' ?% r- x$ w - #progress = u'\r...'
3 \7 h; g4 r! _+ N% @0 m -
\" d: }5 f& m0 L/ R6 n5 V\" e - data = self.urlhandle.read( self.oneTimeSize )# v) X. g2 N: ?8 g; r4 {: }' E
- 2 _# y1 \0 g$ e+ M) P4 T
- def GetUrlFileSize(url, proxies={}):) _8 z& F/ G8 l- n\" \% ? P
- urlHandler = urllib.urlopen( url, proxies=proxies ), y- b\" U; i8 P0 C% Q
- headers = urlHandler.info().headers; A3 k) @1 c. |# `8 f
- length = 02 [3 A- B; o* e3 T9 g+ |. c
- for header in headers:7 x w! \ q7 R1 w: X8 f
- if header.find('Length') != -1:1 g7 S$ z4 c) A
- length = header.split(':')[-1].strip()( S8 ^# y) S\" W
- length = int(length)
- P X# H n0 c4 V/ |1 D/ | - return length5 Q, z. U2 k9 P( @# S
-
3 S4 Y6 o% _5 u6 ` - def SpliteBlocks(totalsize, blocknumber):) L. T4 |# U7 ^) b\" l, e( ~\" N9 Q+ w
- blocksize = totalsize/blocknumber
\" g$ ^: P% P4 c8 I- v7 N - ranges = []' t) U8 ?: l/ b
- for i in range(0, blocknumber-1):1 f* ^\" s- o. k$ W& ~$ Y
- ranges.append((i*blocksize, i*blocksize +blocksize - 1))
1 M: u3 n9 y$ s0 K - ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))# N+ k9 v& F/ ~
- w) W: Y4 A: g, E8 `* c* `# s& A
- return ranges7 m5 c; ?3 ^: r* v( _# T
- def islive(tasks):
1 s# z7 e- p4 d$ I* H( N |, p0 } - for task in tasks:. `: U- |8 i' q) q
- if task.isAlive():4 N/ m* K9 O6 u* N( [2 t5 T\" D- h0 q
- return True
' M\" ~1 P) J. E$ ^. f; s2 T - return False/ W s: j4 @: O* c, g0 |
- ! O- }4 a8 Z0 _; ~
- def paxel(url, output, blocks=6, proxies=local_proxies):
% _! W# L9 v. o' u- ~. E3 Y6 @1 b - ''' paxel
; l, ^\" W) B) q5 f* f+ ~7 S - '''
& G y/ F4 K; t% n6 H: e\" o/ Z7 R - size = GetUrlFileSize( url, proxies )
; Q$ a4 x# _5 s& Y, o - ranges = SpliteBlocks( size, blocks )2 L0 [( y( a+ X! s. ~- m# n. U
-
9 A( f: y3 t) G& s - threadname = [ "thread_%d" % i for i in range(0, blocks) ]\" H4 ~6 ?) ^' d
- filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
- i5 C6 s( ], R. t! K, ` - 1 i/ A+ x$ U. I/ Z
- tasks = []
0 l' O# _/ j7 y9 G S8 | - for i in range(0,blocks):; I4 i& M2 ~5 x9 z. U5 z
- task = AxelPython( threadname[i], url, filename[i], ranges[i] )
2 e9 |\" J; C3 v. L7 F0 C$ T, `( F - task.setDaemon( True )\" E( B+ Y3 h; T9 J* e% d
- task.start()1 j9 C; I2 J) ^. l8 z+ r* u
- tasks.append( task )8 z# D# F6 V& _5 z
-
8 Q/ i+ l: v\" B9 }. ^ - time.sleep( 2 )' S! H. {8 f# f3 C/ V8 E& [3 W
- while islive(tasks):8 x+ A) u9 @: p7 b
- downloaded = sum( [task.downloaded for task in tasks] )0 A7 F: m' u\" K, v0 M0 Z& y, T4 N
- process = downloaded/float(size)*100
6 u: ]1 @3 E( K! v8 \4 D- r - show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process): [ q: [* K: t8 f1 E9 I
- sys.stdout.write(show)
! |/ s- O. z& \ - sys.stdout.flush(). u ?) l! n; p/ X& r3 m! E
- time.sleep( 0.5 )
/ f\" M: s5 l0 h/ B: u/ [/ z: R5 e -
, _* \- n0 c$ B: P$ Y* |) F - filehandle = open( output, 'wb+' )
0 T5 @/ u0 F3 W: E% v - for i in filename:3 G/ e! B$ F5 l0 N
- f = open( i, 'rb' ), \. J F4 O$ q+ T7 P
- filehandle.write( f.read() )
9 @# s\" _# ~8 \' a+ z+ F } - f.close() u& s& B\" q1 q% d8 n
- try:
% W7 @8 m, S8 P\" z2 F - os.remove(i)) t+ G- R7 a\" f: u- b: V, e
- pass
V( u. d6 g4 D; ]' L0 X; p( `2 u$ ] - except:! o0 m; t# O6 g! E' d6 f# O
- pass
$ Q J- e/ S( B# _. ~# i( ^, v* d - 8 d9 F' R9 `+ d. U+ H4 l! ?: H
- filehandle.close()8 M; o# @ a' _9 S
- 0 w4 p\" D% ~- S\" Q
- if __name__ == '__main__':
3 Y4 J ^% Y& Y; C+ O - url = "http://xz1.mm667.com/xz84/images/001.jpg"' |% _) x0 D) u* B& `$ t; Y
- output = '001.jpg': X8 r5 |3 N9 z E8 Z: r3 K+ P
- paxel( url, output, blocks=4, proxies={} )
复制代码
2 B7 f/ r- y' k |
zan
|