- 在线时间
- 479 小时
- 最后登录
- 2026-4-17
- 注册时间
- 2023-7-11
- 听众数
- 4
- 收听数
- 0
- 能力
- 0 分
- 体力
- 7790 点
- 威望
- 0 点
- 阅读权限
- 255
- 积分
- 2923
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 1171
- 主题
- 1186
- 精华
- 0
- 分享
- 0
- 好友
- 1
该用户从未签到
 |
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。- #!/usr/bin/python
* k6 [4 w7 b6 c\" a! p+ S$ d - # -*- coding: utf-8 -*-) O) R; _: f! X4 H
- # filename: paxel.py7 _0 Z\" K& q3 f\" Q! s\" g
-
' U% ^* W- w! E# } - '''It is a multi-thread downloading tool
# e4 |. ^+ v4 {. @7 Z8 [8 Q - 7 s9 f5 I8 T8 Y\" x9 I( l& T1 W3 m
- It was developed follow axel.1 n$ u: z0 Q! r
- Author: volans
( O3 b\" O# ^' M9 q# g4 j5 @5 m8 G - E-mail: volansw [at] gmail.com& l3 ^4 G7 ]6 U+ _0 D. m
- '''- g' p7 b/ z% L\" k\" ^3 t6 U9 G
-
. o+ e- d' F6 T\" g; G - import sys* l8 f* X- y P9 y% j
- import os1 u& n5 W. @) m' S o: U
- import time
) q7 X7 x% v$ }6 c - import urllib
+ C+ N& ~4 X% v4 `' S$ v - from threading import Thread3 P1 K7 q$ c+ d0 Q9 x
- 1 I$ W3 X5 \ K# N$ X) E
- local_proxies = {'http': 'http://131.139.58.200:8080'}% @: F0 r8 C% U7 V6 J
- 5 ?\" r s2 O( C3 v
- class AxelPython(Thread, urllib.FancyURLopener):3 T$ D6 B. K) g! {, k
- '''Multi-thread downloading class.
\" M; m5 [/ U, y* c% L -
7 z: x* B0 x$ ]$ v2 [ - run() is a vitural method of Thread.
9 g/ o( L- p# f$ Z8 z - '''
5 F& C! }: p+ x# I+ J - def __init__(self, threadname, url, filename, ranges=0, proxies={}):
2 E# Z* n6 N5 W2 k V - Thread.__init__(self, name=threadname)9 }. Z2 M; Z, T, |; O2 Z) j
- urllib.FancyURLopener.__init__(self, proxies)
6 P$ x# _ E( l; e# U - self.name = threadname
7 D. K! K, }) k, g6 |% C- U5 g5 C - self.url = url- i) W' o4 W: ~5 |1 F& p$ M; w
- self.filename = filename
# |9 I6 f& { s# m& U - self.ranges = ranges
, p6 R3 y/ o( d - self.downloaded = 02 z. y5 r. c: i6 X' F7 Q5 b
- 5 h: E, e: c% G! P2 R
- def run(self):
* E5 S2 H7 q1 U# v3 W3 ^+ a - '''vertual function in Thread'''8 R* ]% M6 l6 P* ?1 [+ f3 w
- try:# b7 ~/ \; _4 k7 R) J4 N
- self.downloaded = os.path.getsize( self.filename )
$ Y: W/ V$ J. o1 Y. R' Q3 W# t - except OSError:
: {0 E* H% @5 D! i$ s, [6 z7 u6 x5 b - #print 'never downloaded'9 Y' z e5 ]% c( v+ [' t\" P
- self.downloaded = 0
* T# m2 X* ]2 @8 u - , e5 H1 @$ U1 D5 r
- # rebuild start poind
. z: b% Z1 H7 e4 f$ w7 r - self.startpoint = self.ranges[0] + self.downloaded( ~9 e/ p: P5 \- Q3 r3 k, O
- , `& \3 w) i- p- a3 a8 a) S
- # This part is completed
! A/ l2 z+ f4 O) C; i% N - if self.startpoint >= self.ranges[1]:# G+ `* l1 e) @% R8 L6 O* R; `
- print 'Part %s has been downloaded over.' % self.filename; F1 E) u2 S: S8 B t\" p
- return
& ~: f: i8 n9 Q: g% i. e - 6 J) s& `% `+ d* b& Z) [( {
- self.oneTimeSize = 16384 #16kByte/time
, l/ Y\" c2 w% G6 d\" _- b$ w ]) V - print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
9 J4 }) O. ~3 G! W$ I( o8 G - ! |\" Q5 r& K6 m& T. h8 M5 g( Q
- self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))) x2 ~5 Q! q }) |: T
-
2 Q) x2 e% H; d3 y! E: g0 w3 r - self.urlhandle = self.open( self.url )+ j! h# S) k* Z/ @# C& b. Y; N' _& Z
-
7 A% k8 \$ G! c - data = self.urlhandle.read( self.oneTimeSize )
! X: n5 I7 W6 ^\" d) L! G) H - while data:2 X E- O6 Q% v* Q: W( z& O) S* g! b
- filehandle = open( self.filename, 'ab+' )6 g0 q% p9 R0 v% ^$ Y
- filehandle.write( data )) c7 m' w0 W$ \3 s8 F3 r/ b' B
- filehandle.close()% L* @5 m W/ P( o. m
-
9 G/ R/ _6 I3 F3 ~ - self.downloaded += len( data )% _- y! b- i: o0 l& R6 ~4 P
- #print "%s" % (self.name)
! S+ Y9 \, K& X- M& w% L( v - #progress = u'\r...'3 Z- e# T9 S# h4 J4 A* V( M7 A
-
$ s4 {3 {, Q0 E# j5 e+ w - data = self.urlhandle.read( self.oneTimeSize )
: i/ l+ @: f' m5 r -
9 o7 [' {8 Q6 w: U! R - def GetUrlFileSize(url, proxies={}):/ I; J! I: i) ^, Y3 L
- urlHandler = urllib.urlopen( url, proxies=proxies )3 i% E; M+ B9 O9 P
- headers = urlHandler.info().headers( X' e$ B0 z( ^2 ~& b z/ q3 W3 r2 Z: A8 P
- length = 0; W$ Q4 D( T3 G0 R, h! g; d
- for header in headers:
6 j& A\" W& q! d6 F( W - if header.find('Length') != -1:
; `/ |& U5 D6 g i8 z/ |) S) l - length = header.split(':')[-1].strip()
/ B' h7 q0 }9 h1 t+ K i( R - length = int(length): y; Q2 T# U9 @
- return length
0 S8 }0 M( n- z0 I* l -
5 }2 X2 w1 X2 F5 n6 i: b: l - def SpliteBlocks(totalsize, blocknumber):
1 G5 f' _5 \! s\" O0 m - blocksize = totalsize/blocknumber5 s8 ?; I+ B) L) H4 S
- ranges = []
7 X1 F* x5 h! t* d! ^4 Y' a- W - for i in range(0, blocknumber-1):
0 D0 ~6 m6 ], F - ranges.append((i*blocksize, i*blocksize +blocksize - 1))
; T) l: K# q; L6 r - ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
, Q$ L' _/ L9 I+ @5 V1 H+ [8 ]# r - 7 M9 C/ u) C; t) c. s* x
- return ranges8 m) `9 \- n- {( W
- def islive(tasks):/ d\" D1 a# |2 Q
- for task in tasks:/ R% c# G2 @- ]) k+ r, r
- if task.isAlive():
9 b6 ]7 ~# ?$ n - return True1 t7 I( y; i\" @+ v
- return False& m0 ]/ Q) i1 I' e. e! @
- c- j' }, `6 m8 y4 ?
- def paxel(url, output, blocks=6, proxies=local_proxies):* o Q2 A7 \, g\" A; I% S$ T! S
- ''' paxel
0 D* [9 u( x! O - '''
' g& k, N- Q; Y, s2 {% ` - size = GetUrlFileSize( url, proxies )
( F1 a\" y* J; R9 G. W2 \+ c. n( Y - ranges = SpliteBlocks( size, blocks )5 Z: n) E1 U+ `4 S6 \
-
6 [. Z\" W0 }0 }: a1 M# l7 E\" d - threadname = [ "thread_%d" % i for i in range(0, blocks) ]
3 h l' V' Y' C/ x - filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]+ I\" p7 }6 C7 ~
-
# ?' ~/ i8 S9 z L# W - tasks = []
! w0 n: i& @1 A) \) D - for i in range(0,blocks):
0 W; t! F' y! p4 k2 { - task = AxelPython( threadname[i], url, filename[i], ranges[i] )
\" H* Q1 ]0 \' ] Z' D - task.setDaemon( True )$ W# f- H y+ y, k
- task.start()% ]# Q3 j1 A/ S1 a
- tasks.append( task )
3 ^3 O3 v: M9 f+ Q -
1 `- e3 P\" f' k, g; [0 U# G - time.sleep( 2 )6 Q! A4 Z8 z, u# z4 q
- while islive(tasks):
8 T% B0 j$ w* g1 {* w - downloaded = sum( [task.downloaded for task in tasks] )
- {( o% E4 [( ?\" P - process = downloaded/float(size)*100% ]- O5 c( y$ P
- show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
0 i4 c1 V8 S) t. q) F4 N7 p- s0 X - sys.stdout.write(show)8 N$ o% C* @: `% b6 h
- sys.stdout.flush()
9 J! [% @- h- N! U/ T( w0 t' v4 m - time.sleep( 0.5 )
; {8 ^, F# r9 \ - $ E' w/ ]5 a( P9 Q. K
- filehandle = open( output, 'wb+' )- F6 H/ G, v8 Q1 f$ q0 Z
- for i in filename:
1 z4 y( Y9 d) f\" v% ]9 S - f = open( i, 'rb' )2 X' V\" u1 r- |2 }: B
- filehandle.write( f.read() )( J' G4 m: m4 {( H
- f.close()
+ h4 c3 S4 g- H2 i7 K - try:
4 V' H4 K6 @* ]6 Z7 i9 r - os.remove(i)2 ^, e- I2 f( V I
- pass, B0 m' U& d9 d) v- f9 }\" j, R
- except:& o0 ^2 ]) x( V& o
- pass9 X2 i! D. E( V9 v8 h
-
+ m\" r$ Y$ ~) v3 W, L - filehandle.close()
$ \% c1 m. O5 N; p R' }, j - 3 H/ u3 `3 A$ D; Y
- if __name__ == '__main__':
4 Q7 Z! Z; G% F. k0 f8 \ - url = "http://xz1.mm667.com/xz84/images/001.jpg"
2 ]: M1 s2 o$ K7 a! o! _1 B9 E - output = '001.jpg'
: {! O& V\" ? O - paxel( url, output, blocks=4, proxies={} )
复制代码 & u, y N. g0 ]* n( v
|
zan
|