- 在线时间
- 480 小时
- 最后登录
- 2026-6-1
- 注册时间
- 2023-7-11
- 听众数
- 4
- 收听数
- 0
- 能力
- 0 分
- 体力
- 7823 点
- 威望
- 0 点
- 阅读权限
- 255
- 积分
- 2934
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 1174
- 主题
- 1189
- 精华
- 0
- 分享
- 0
- 好友
- 1
该用户从未签到
 |
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。- #!/usr/bin/python# |0 A) h4 I% B; q9 d: U8 g
- # -*- coding: utf-8 -*- ?. z! ^6 w- M5 [+ M
- # filename: paxel.py$ `5 V* w$ p; O4 Y
-
$ }& u5 q* A2 j9 y - '''It is a multi-thread downloading tool* V, S* h& y\" n: ]- a+ h
-
6 k3 F& N0 l8 J' A - It was developed follow axel.8 W+ b8 Z7 g1 w/ \+ [2 h3 @% ?: p
- Author: volans
2 |( b/ v1 Y+ f. |( r - E-mail: volansw [at] gmail.com
$ M7 w$ Y3 `# M H0 ?, x, S1 l/ ] - '''
$ t- p' r\" E: R* N1 @( L -
' I+ w& j o6 o6 ?+ R - import sys
# K, B! @# X- K% S8 W% c - import os. ~6 I# F: y- F8 y; ?: @7 w
- import time: t$ F: M2 d! p: v9 Q; ^- V
- import urllib9 g' }\" d; Q' p1 s\" n: B7 E
- from threading import Thread% @/ O; [) i% l2 l/ v3 \5 [! _
-
+ O) C( x9 C& Q2 X9 ~) n; k, b\" g - local_proxies = {'http': 'http://131.139.58.200:8080'}: }/ T; t8 ?6 k( f; a9 X3 H+ ?/ m) {
-
; G4 e, |5 O/ E g; A/ q - class AxelPython(Thread, urllib.FancyURLopener):
* ]: a# ^+ r. p - '''Multi-thread downloading class.5 q6 x, g- w/ G6 t1 B* E9 F
-
* P/ `$ r! k1 @# a\" M! I - run() is a vitural method of Thread.
; K2 y9 Q3 I3 d) d/ a4 M3 m& j - '''
l$ ~; G! N$ d - def __init__(self, threadname, url, filename, ranges=0, proxies={}):
9 M& _$ c! W0 C) t! W0 h - Thread.__init__(self, name=threadname)* U9 H\" T7 j& J9 d1 C
- urllib.FancyURLopener.__init__(self, proxies): e! t# ]* t0 a% M7 @7 V& U1 b
- self.name = threadname( D# A) c/ L1 z0 o u
- self.url = url
5 F+ A; k1 J+ ` @7 W - self.filename = filename- j' a; O. k* w# E# D) v
- self.ranges = ranges
% L4 P9 }5 K7 j$ Q7 E4 H+ L - self.downloaded = 0
8 ^5 Q% z8 H! M: J6 d, P - 9 P1 [9 ?) {, k6 S1 g
- def run(self):3 m7 ^! F\" v0 h# V# m+ O
- '''vertual function in Thread'''% ~$ y4 W0 A\" N8 j
- try:3 y' `$ g# T5 J) W) w! E
- self.downloaded = os.path.getsize( self.filename ); }$ ?% m+ k- @
- except OSError:5 p* v3 j- v u7 b9 T: Y8 x
- #print 'never downloaded'7 f% |7 m v9 \9 ]
- self.downloaded = 0% Z( ]8 D. v0 ]' ]
- 5 V2 N8 S3 ?& J0 V\" _
- # rebuild start poind
3 c B7 z* l2 I* h3 {+ U- u - self.startpoint = self.ranges[0] + self.downloaded\" h. J# T: q8 d
-
4 d1 m# [ z% m# }' a4 X - # This part is completed
- Y, b2 r; Z2 t1 i - if self.startpoint >= self.ranges[1]:
: S' f }/ Y x6 A$ B) d! K\" c9 Y3 H - print 'Part %s has been downloaded over.' % self.filename
3 q! r' k: ^# G8 Q - return
\" v2 F2 U) _1 q& K -
5 ?5 t) ?: }1 n - self.oneTimeSize = 16384 #16kByte/time
+ C1 V. f1 ?6 i( L - print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
/ B6 }% b3 l/ s% u - 9 [7 o. o- M4 d- o
- self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
' I; v4 w2 |7 u+ }\" W -
' C3 C* Q' k! ~- i8 L - self.urlhandle = self.open( self.url )
& r7 i/ ]* J; A5 s z4 _ - ; f7 \, g3 p! L
- data = self.urlhandle.read( self.oneTimeSize )
; b9 ]0 ~ v# a8 o+ z! H+ H9 n - while data:: l0 L9 N8 G5 t5 ~5 g
- filehandle = open( self.filename, 'ab+' )! `* b# [; K) |' W: q
- filehandle.write( data )
, M! T9 c) Z c y6 G: P - filehandle.close()
; v2 M9 _9 Y+ f# y* G -
2 _ E\" ]0 _ }* |& F7 V2 w6 ~; M - self.downloaded += len( data )
7 r0 E: o$ I6 @ ? - #print "%s" % (self.name)
# B$ O& a, G% b0 @ - #progress = u'\r...'
* \3 t9 q2 R3 P\" g# L# V8 P -
6 A$ `- g& K& n# I - data = self.urlhandle.read( self.oneTimeSize )
' g8 z8 L& y9 J; P+ b d - \" N7 y% G! o: }2 Z
- def GetUrlFileSize(url, proxies={}):0 H' i( v7 C+ P) z, w, s' x9 g
- urlHandler = urllib.urlopen( url, proxies=proxies )4 C& N1 e8 S7 {2 y! W- K
- headers = urlHandler.info().headers6 V7 b; |* O! E. R& o
- length = 0
$ d1 A8 z* ~* R1 M) [. B - for header in headers:3 J4 [( I' \, s# h\" e9 V6 b6 B' w
- if header.find('Length') != -1:
1 ^5 D3 @9 ]. u - length = header.split(':')[-1].strip()7 T: g0 S6 l2 d9 I7 f$ j0 I
- length = int(length) e3 \# e( e3 {( t0 K6 H9 `# p
- return length- D( j' Q: w8 R7 `' k0 s
-
I0 V' W' A8 S3 \0 c - def SpliteBlocks(totalsize, blocknumber):
7 E- d2 A& Z+ I l9 | - blocksize = totalsize/blocknumber
\" J1 m# \, s+ r( ?: d; _/ \* t1 t - ranges = []
# L3 Y2 g, u! \8 H. ]/ p! Z* e) N - for i in range(0, blocknumber-1):& h. A% v9 G; {( p7 M+ ?
- ranges.append((i*blocksize, i*blocksize +blocksize - 1)); S5 r3 _3 G8 l
- ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))9 X) ]3 H3 b! Q# U
-
1 x5 K, [, X9 |9 | y* ?9 L - return ranges! r# j0 u4 w4 d
- def islive(tasks):6 ~1 x2 l% G) b! V+ H
- for task in tasks:1 y& Q; t$ K2 J7 e8 M1 O7 `1 A7 r
- if task.isAlive():
) c, k& k9 Q9 J - return True7 m- I\" X' n\" j/ u1 ^2 C. `
- return False
, `( D3 S, _, I( I -
! V4 u6 N3 t0 w& z# l) ` - def paxel(url, output, blocks=6, proxies=local_proxies):: }3 J# B+ |) Q
- ''' paxel0 M% J8 H8 u% Y9 X; J8 k3 O; q
- '''% R% b6 U9 ]5 b* g
- size = GetUrlFileSize( url, proxies )
) k; }' l. [/ i0 y - ranges = SpliteBlocks( size, blocks )/ l4 P) t\" N1 B# V8 {
-
( A\" H7 U' m# t- N - threadname = [ "thread_%d" % i for i in range(0, blocks) ]
/ M6 d$ X9 M6 h! F' _ - filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]# Y- Z3 s\" v4 X
-
( v$ o- M! d* r$ p! a2 R - tasks = []
9 t% ]$ o1 R# G: a) l- L - for i in range(0,blocks):
) j\" B: ~+ S% @ n) ]& l: k\" K6 K - task = AxelPython( threadname[i], url, filename[i], ranges[i] )9 i5 e$ E4 @9 @2 L3 z
- task.setDaemon( True )
3 E% O. Q6 e# c/ D5 S# b - task.start()
3 U* ^' I2 ^% c, p/ j' s& Z - tasks.append( task )
, W; V9 W; ~/ l y% R -
6 Y+ q) g6 A ]7 {& W; f* d3 m - time.sleep( 2 )2 ?! |% M! W* t6 f; t) ~) O8 j1 k; N( d3 V
- while islive(tasks):3 Y3 e. b4 e8 H7 p; z
- downloaded = sum( [task.downloaded for task in tasks] )
1 {* b! y, E8 f% w - process = downloaded/float(size)*100
9 C) w5 o( P* ` - show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
* b0 l7 i$ Q5 x! ^ - sys.stdout.write(show)
7 w, K/ w3 Y6 u- g6 C8 b* [+ c* ` - sys.stdout.flush()
. d- C; a T* k\" y! [3 {% `\" | - time.sleep( 0.5 )
7 u% a0 r# Z; d9 b3 [/ y- i - ; M. t! {9 X1 f2 v8 e
- filehandle = open( output, 'wb+' )9 ~( z- V& i- g4 X# f0 N
- for i in filename:0 I0 L% E7 U+ e7 c
- f = open( i, 'rb' )
) P$ e7 X* b( X* h3 Q' W - filehandle.write( f.read() )3 K5 F* A7 X0 n\" z# v
- f.close()
G\" g& U4 i\" ]* Z, H- O/ D! q - try:
1 V6 t( V1 \% |0 D* l, _ - os.remove(i)
% g1 h$ u$ C' v; ^: f; A4 X0 Q - pass+ ~0 d+ @+ L$ G$ |; Z1 H8 K: X4 l
- except:
; K5 y9 g( G& H! @ F - pass. g9 t- T+ u6 T6 o
-
% B) [% l+ j/ f+ J - filehandle.close()
- @% g- C. o3 e; Q) O7 S - / l5 |\" r: n9 A) p3 k4 E7 w
- if __name__ == '__main__':, T2 A4 }3 C( j3 E6 g. u) r
- url = "http://xz1.mm667.com/xz84/images/001.jpg"
9 [( u/ T# Y( a+ B3 J - output = '001.jpg'* X6 j: e- R% R5 @1 r9 l* l
- paxel( url, output, blocks=4, proxies={} )
复制代码 ! q8 {) n) R/ J; _; k
|
zan
|