- 在线时间
- 479 小时
- 最后登录
- 2026-4-17
- 注册时间
- 2023-7-11
- 听众数
- 4
- 收听数
- 0
- 能力
- 0 分
- 体力
- 7790 点
- 威望
- 0 点
- 阅读权限
- 255
- 积分
- 2923
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 1171
- 主题
- 1186
- 精华
- 0
- 分享
- 0
- 好友
- 1
该用户从未签到
 |
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。- #!/usr/bin/python
, f% m( W; {, i) Y% G( G+ q - # -*- coding: utf-8 -*-
8 j: E- O+ w6 m* e, K/ P$ A - # filename: paxel.py8 w' f7 H: V: i' L' k\" u, @+ \
- # Z- d$ K: r$ z% o ?; G& C
- '''It is a multi-thread downloading tool Z& C3 T\" a* o7 G2 M& B% a7 o
- - {# j4 m\" R u$ K m% p
- It was developed follow axel.
s; i: P7 X; g) E\" ], f: ~ - Author: volans/ H) y+ z8 `& t e
- E-mail: volansw [at] gmail.com( P8 I# g8 R) h\" U9 z
- '''9 S# g- q6 y c
- U, }+ {& |\" o7 x* ?% X
- import sys
6 Y\" f. x Y' G. R - import os
8 v/ O* K; S$ G6 q\" P( `7 o8 z - import time5 W\" U C. m. M3 b' H
- import urllib
\" T$ O& m) e1 c% \& B- u# K - from threading import Thread
1 W! P0 l; K B4 F1 k -
* u\" l. L' z5 B6 ]! h6 o( j/ i; w6 x - local_proxies = {'http': 'http://131.139.58.200:8080'}/ d/ X |- b) O
-
0 v% x. g7 e8 \9 `& N\" M& R9 g - class AxelPython(Thread, urllib.FancyURLopener):) u: X% u( t3 K\" C. o4 T6 Y
- '''Multi-thread downloading class.
' l% C$ o7 U- [: _1 \2 Z; { -
8 t- b8 r. F$ D - run() is a vitural method of Thread.
) j0 E3 C3 o$ C7 T6 |; |6 o - '''
\" g4 l6 H\" v: y - def __init__(self, threadname, url, filename, ranges=0, proxies={}):
& K h+ A7 n/ E1 O; ~/ W1 a7 X\" @ - Thread.__init__(self, name=threadname) r: K, z( J& E k) R* V; D
- urllib.FancyURLopener.__init__(self, proxies)
8 x; | m5 f+ C( r% ^1 P0 N - self.name = threadname
8 s0 s\" D2 f3 n2 e7 h\" m1 R( [ - self.url = url
9 r- y$ @% \: h) @; t - self.filename = filename
* O, ^2 I9 s) X; A2 y; s\" a - self.ranges = ranges! Y% y9 m; Q, Y. S2 w8 i. B
- self.downloaded = 08 s- t; x+ h! D L
- 3 F+ b- {, I; F6 R, \\" z3 S
- def run(self):
3 }2 K$ ?2 O5 L4 F - '''vertual function in Thread'''/ v: ^7 _6 O8 R- v' l! [
- try:\" D2 L9 C\" V( a/ C
- self.downloaded = os.path.getsize( self.filename )
! Z\" F( l* p' o) B9 T2 Q u - except OSError:; a# R7 f; z: W! e
- #print 'never downloaded'+ n8 l$ S. v! j8 [/ B3 C# C5 X& j& ^
- self.downloaded = 0- P+ F' |! ~$ s( X+ y
- & f; w. [$ f' v3 {; Y- L; t$ R3 X
- # rebuild start poind5 n9 f: y: X$ E
- self.startpoint = self.ranges[0] + self.downloaded$ H8 G; n. f5 Y& c0 @1 H9 L
- , P' X) H( M: B) f0 n
- # This part is completed
; M& Y& c1 f6 ~- a2 `( t - if self.startpoint >= self.ranges[1]:
4 b5 A\" l) E+ E' ~7 Q% E$ A - print 'Part %s has been downloaded over.' % self.filename9 K' `8 z\" [% u- _0 |5 z+ I5 g
- return& s; b& X1 ? H1 d7 ^
- 0 p. B& v2 }5 A5 X1 A1 G0 Q\" z
- self.oneTimeSize = 16384 #16kByte/time! ]: Z; Q& o5 h6 @+ \
- print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])7 e: i# C6 v6 T6 d1 V
- * X: A( r3 h/ h+ P- v T9 l
- self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
5 H# f\" H2 J+ R/ k1 S -
8 @' i2 e9 X3 Z' O6 @* Y4 k - self.urlhandle = self.open( self.url )- }, Y u: l3 s+ H5 _: R: D
-
) L! O, \* A& \* {4 {- e$ _\" F - data = self.urlhandle.read( self.oneTimeSize )
$ l) I; u% l7 {' | - while data: e/ T- g3 ] o6 i
- filehandle = open( self.filename, 'ab+' )/ g- c6 w8 Y+ c; i
- filehandle.write( data )9 e0 n. b* P* J( ` B* t9 L$ |\" V8 w
- filehandle.close(). w, i; \/ I$ A1 {! h, w: ]
- 3 k; J. Z4 G1 O5 O- ]) s7 h, R R
- self.downloaded += len( data )7 I& _4 J! Z a ]
- #print "%s" % (self.name)5 u0 t* p. @) p. g1 Z\" ^7 S0 B4 [
- #progress = u'\r...') ]8 ]5 ~9 F% e9 f3 q8 y
- 1 Q2 _+ u# {, T% P, D7 t
- data = self.urlhandle.read( self.oneTimeSize )2 i6 K/ k# g6 D2 y& H- Y2 M+ C# H0 G
-
; Z) q8 f. y1 h5 @ - def GetUrlFileSize(url, proxies={}):
3 D4 w r/ ^# I- \ - urlHandler = urllib.urlopen( url, proxies=proxies )* T& B! ^) q8 b' Z/ p& h
- headers = urlHandler.info().headers
+ }2 r, [; K/ X) V/ z g - length = 00 m+ \\" j5 R6 b# ^9 P1 G4 m
- for header in headers:
! E. h+ Y, e( C) n- U - if header.find('Length') != -1:
' W8 u\" E& z$ m/ D6 v6 h& p - length = header.split(':')[-1].strip()
) Q. m; E9 Q0 ~% @* t - length = int(length)
4 {1 d. p; H- d' P - return length
3 g1 I- ?1 r- w - 3 N0 L! F4 J' r) M% q
- def SpliteBlocks(totalsize, blocknumber):* ?: p# K5 E+ F5 G
- blocksize = totalsize/blocknumber
8 V$ e) O) q+ T; {; p - ranges = []# u4 V/ K\" O\" w# T
- for i in range(0, blocknumber-1):2 k* @' {7 n; L2 F6 U) U$ z
- ranges.append((i*blocksize, i*blocksize +blocksize - 1))) H6 O3 U( n/ O0 M' X' H
- ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))' h' k6 r+ C# K+ F
-
9 B8 S* R; l$ f% v# v' h - return ranges
1 R$ n5 M5 S! w& J0 N - def islive(tasks):! Q4 K& [' B+ j5 y
- for task in tasks:. D9 Z: H9 y1 d' j X1 ?+ y4 V
- if task.isAlive():) U1 Y+ J' l% n$ `# I7 e5 J\" a$ R
- return True
2 d3 b M0 \2 n - return False\" l6 s- E0 t% O( I6 r; K
-
9 R3 q- [/ v& ?- y+ D# @ - def paxel(url, output, blocks=6, proxies=local_proxies):: {- `- Y) [& z8 n; B
- ''' paxel
, E3 n' A# x& N* \% h; ` - '''9 e9 K7 h! L' s. c- H6 l! a; v
- size = GetUrlFileSize( url, proxies )3 @( b/ V$ w: i
- ranges = SpliteBlocks( size, blocks )
; e5 d) f+ k! H1 j( p( _7 J - - }6 q% h2 h) P9 E7 C* S' h
- threadname = [ "thread_%d" % i for i in range(0, blocks) ]
8 Q' x# R% ^3 x% |2 ~ - filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
, ^* a( V* S5 S6 Q, Y5 z - ; B1 k. ~! ~# _) s
- tasks = []# |\" u3 y4 p1 H' Y4 x2 z# t
- for i in range(0,blocks):
9 e\" _( k% G: Z- t6 X; B0 A& j - task = AxelPython( threadname[i], url, filename[i], ranges[i] )+ K( i8 H- z$ P\" M/ Q( c0 G
- task.setDaemon( True )
H7 ^; I( Q: h! H1 @3 [ - task.start()
6 p! S$ \$ d' E+ R- ]: R& X - tasks.append( task ). ^3 z9 P) k, `8 A$ v! ^
- 2 N9 Q, _: S+ A9 [
- time.sleep( 2 )& D( S1 e+ R/ [
- while islive(tasks):
- @5 P) w5 v# i; s+ E7 P, ` - downloaded = sum( [task.downloaded for task in tasks] )& s& ~+ ~2 x, c2 q, n: L
- process = downloaded/float(size)*100& `4 O. C) m0 B4 K* z1 r
- show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)5 }0 i. N9 W' O: L- X' D7 B
- sys.stdout.write(show)
0 o7 Q, H$ ]5 `2 m! ?, R& \2 W) ^ - sys.stdout.flush(). P# g* p( A/ X7 Z9 d% `
- time.sleep( 0.5 )\" T: F1 T0 k4 B; p8 }
-
! K# h0 q @+ c5 i# N9 Y8 a5 o - filehandle = open( output, 'wb+' )
8 A' J4 V! H8 m2 j8 @ - for i in filename:- g& g9 y s. m0 p! V( E
- f = open( i, 'rb' )2 B, u& B7 t) a
- filehandle.write( f.read() )( t\" g! e- ~0 U
- f.close()
# E h8 ^0 j: x, K - try:* p, i' V4 ?) @\" \
- os.remove(i)
/ ^) A9 y. x\" }2 v$ t* r - pass\" p$ T' }0 k/ c$ N+ i; b
- except:( u) @( D P, Q5 a/ N( B
- pass
4 F! o4 n6 F\" K/ R' y( S0 R0 a -
* i! d$ H\" U/ P+ [' o3 { - filehandle.close()
, o& y. ?: F, j! w, s -
: T: q5 Z$ J. f3 J\" d6 T8 r - if __name__ == '__main__':
' B\" H' _( r: Y# _7 b# c1 ]/ ` - url = "http://xz1.mm667.com/xz84/images/001.jpg"
8 m3 }; K5 _) i Z( x - output = '001.jpg'8 B s' U7 ` D/ w0 A
- paxel( url, output, blocks=4, proxies={} )
复制代码
3 t& {9 {% \: y; ~! o% H |
zan
|