- 在线时间
- 471 小时
- 最后登录
- 2025-8-11
- 注册时间
- 2023-7-11
- 听众数
- 4
- 收听数
- 0
- 能力
- 0 分
- 体力
- 7639 点
- 威望
- 0 点
- 阅读权限
- 255
- 积分
- 2872
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 1160
- 主题
- 1175
- 精华
- 0
- 分享
- 0
- 好友
- 1
该用户从未签到
 |
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。- #!/usr/bin/python' ~4 L0 {/ P3 o. U! e6 c
- # -*- coding: utf-8 -*-2 j+ X) q- }& F! J2 o& V
- # filename: paxel.py) W# D! q( V/ E. u7 p8 w+ ^5 S! ^6 j
- \" I; @' z! M6 x- O* i# I4 k
- '''It is a multi-thread downloading tool
9 F- l, e/ u0 o5 L, } B - % V6 o% ^9 n\" m
- It was developed follow axel.
S9 m, T4 ]- U0 e1 F - Author: volans( o( ~* P: o H) r
- E-mail: volansw [at] gmail.com
# S1 `8 H' ?# H F% P3 n; s - '''# L, ] @. l# U
- # ^2 A# s, w* r: T
- import sys
% }9 B/ }; a! W0 p8 h( F - import os
2 ~& Y4 S\" ~' { - import time
( q# R) x7 S3 a6 p) A - import urllib, X9 {, M' [% D2 g
- from threading import Thread
) w. M) J+ W\" p* X/ ~) W m - ) [- u\" l' A h. O; u
- local_proxies = {'http': 'http://131.139.58.200:8080'}8 p7 \: D0 M* J% f. g( o q
- . T& g) L; I- y. H/ v& E
- class AxelPython(Thread, urllib.FancyURLopener):( F. v\" w o( W* P& t7 V
- '''Multi-thread downloading class.3 e( z% {, ~0 ~) X
- ) I/ X! | s! v. `- t
- run() is a vitural method of Thread.
* W7 T1 N. M1 P - '''
6 h) S# a- { {) ?( R/ [ - def __init__(self, threadname, url, filename, ranges=0, proxies={}):
* O8 P- z; w( b4 B9 C/ T - Thread.__init__(self, name=threadname)
, @( G E' H% ~9 J' u - urllib.FancyURLopener.__init__(self, proxies)0 p% C% Z+ K8 T' @9 z& x2 U0 r
- self.name = threadname5 }5 N! }0 N3 y! n7 f% G
- self.url = url, i0 `# ]3 J& h. T
- self.filename = filename! f& ~1 ?- Q8 T- G7 V
- self.ranges = ranges9 F$ @* E- R) |$ w+ C! N
- self.downloaded = 0
9 z2 h, d& i7 ?0 j - ' i& m! v, g9 h4 Z% o( K' m3 T5 U6 ~
- def run(self):
3 {\" J* N* {0 a! ?3 Z! F - '''vertual function in Thread'''
- X4 Y& N8 @3 {* r/ Z* b4 ^( ?: U( Z - try:( u1 h\" { T6 K\" S
- self.downloaded = os.path.getsize( self.filename )7 p3 X/ s8 ~. O- A: e3 S% i* s
- except OSError:; ~6 M0 U, K+ X) |# C: e1 Y
- #print 'never downloaded'3 G I) e9 |# K. M7 @0 o
- self.downloaded = 0
6 X' c% g3 ?0 A& L2 }, f -
/ ]6 `3 o | F9 ~2 E7 i' N5 V - # rebuild start poind0 L+ F y* e& D, a. W$ `
- self.startpoint = self.ranges[0] + self.downloaded7 ~& v! w+ a. r/ m- c- i
- # q# E0 y$ [# @9 d# q) k, x7 v
- # This part is completed0 A( Y! }- g) K2 L+ }
- if self.startpoint >= self.ranges[1]:
9 [: x) `7 z( k3 }1 } - print 'Part %s has been downloaded over.' % self.filename: `5 G$ [# y. T, Q0 v
- return: O+ N- i, {\" {2 F
- % F$ n* A8 s) ?% n
- self.oneTimeSize = 16384 #16kByte/time. @3 |+ \\" F+ @, g; v& P\" R
- print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
+ p) Q+ h: I ] l7 ~* [3 f( Y6 Q -
; `8 g* {* k& g: M - self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))! D\" r% n. D# `$ x
-
3 w, \; y; ~; w - self.urlhandle = self.open( self.url )
; r& |0 r6 o( P @( ]0 W - ! ~0 S( T, V: ]0 v4 T
- data = self.urlhandle.read( self.oneTimeSize )% V& i% l r. N+ r
- while data:
0 u S8 {6 [+ D, W - filehandle = open( self.filename, 'ab+' )* v: Y4 c3 I+ B+ W
- filehandle.write( data )
( n1 y: C- s; M1 h6 u1 D - filehandle.close()
9 r3 v3 t% Y# ]: ]4 t -
% E$ k* H: V6 o3 @! {: p1 T - self.downloaded += len( data )
1 }& M/ d4 }# P8 _% m - #print "%s" % (self.name)5 f6 f9 \: a! z7 _2 s A; U
- #progress = u'\r...'4 `$ y% p) F% U% i* c% b
-
* X+ e) s2 W1 Z - data = self.urlhandle.read( self.oneTimeSize )- @8 Q3 W ?1 F6 e
-
0 b( c7 i\" {2 n! q - def GetUrlFileSize(url, proxies={}):3 d6 U( R, p- h9 W
- urlHandler = urllib.urlopen( url, proxies=proxies ), \6 u; z: a) @0 e. Y
- headers = urlHandler.info().headers4 i; ^1 Y+ V) [: @7 q\" Q
- length = 0; W' t+ D\" ^) T7 e6 X( q& o
- for header in headers:
6 K4 F: i; k$ Q) R4 Q3 C* M7 [ - if header.find('Length') != -1:0 r' U6 |& K$ M1 ?
- length = header.split(':')[-1].strip()
4 ]7 Q9 @ O+ R - length = int(length)* L4 p5 P' ^* Y2 N+ i; B5 |, M
- return length
+ }+ l; j9 J& k g4 S - , P) g- y+ m0 `7 V' P
- def SpliteBlocks(totalsize, blocknumber):4 N8 w- V/ g2 Y: A' k
- blocksize = totalsize/blocknumber
& a* Y9 {: N) ?3 o( m' v - ranges = []
$ p' I! J5 v! l' e% c; ~ - for i in range(0, blocknumber-1):! j- ~( h2 M7 p3 Y: T
- ranges.append((i*blocksize, i*blocksize +blocksize - 1))
4 l' f3 u. q& _% H- ~' A - ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
9 \\" o\" S( o- Z, T7 A - 7 g. S; K# H: A- b; _5 m* M) O
- return ranges$ @+ w: e/ u9 G) X$ Y
- def islive(tasks):2 i6 a3 u) m3 o7 Z/ W\" `! q1 u7 L
- for task in tasks:
' r, _' [% T* [ ^0 V& G - if task.isAlive():
\" `; H( G9 A: W( G/ q - return True e I0 C$ Z/ i3 U- D n
- return False
0 Y1 ^8 [2 a2 V# i, h - \" L. }7 X4 X. G: h& E
- def paxel(url, output, blocks=6, proxies=local_proxies):2 g$ p4 W: s7 ?& K' D3 B9 B9 x
- ''' paxel- Q9 F9 ~+ p0 k' @8 {3 h0 F
- '''- n8 G) Q3 ^/ V* y: s
- size = GetUrlFileSize( url, proxies )
; Z7 H5 r\" A/ o+ a( x - ranges = SpliteBlocks( size, blocks )
7 o( x2 [- k4 X/ G - . m! V% r6 f' X8 r- {$ a6 Y) q
- threadname = [ "thread_%d" % i for i in range(0, blocks) ]# D\" z/ w' S( }- _9 x
- filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]: w8 I7 p0 m' X
-
( c' N) d$ J/ z3 ^& W3 z - tasks = []: @8 [! L; i9 L* F6 H: G$ c; a
- for i in range(0,blocks):0 w1 t- G, z! X4 b
- task = AxelPython( threadname[i], url, filename[i], ranges[i] )! ~6 Q* u& S- @
- task.setDaemon( True )
; }: e: j, y# P( w7 f\" g - task.start()! s. A/ X; g$ U, U8 R% \
- tasks.append( task )) q p+ o0 t\" E/ y- N8 ~8 L
- : X$ `\" |: S6 E. f# p
- time.sleep( 2 )
! D# g/ f% S& y! d2 {* P$ Z - while islive(tasks):$ B% W* J N% Q2 L' ]; r/ u
- downloaded = sum( [task.downloaded for task in tasks] )
6 k2 \& \. a, L2 [ - process = downloaded/float(size)*100% l/ x, M% G; M, j G- p! z; z
- show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process): X! K% ]9 v C; [& f) I
- sys.stdout.write(show)$ @1 @2 h, g5 j7 y( \' v( }
- sys.stdout.flush()% A3 w) `% q2 d. ]$ ?' z8 j
- time.sleep( 0.5 )& T! W/ F k9 o' v; m
- 7 U' p5 u# ~- i! V+ L1 c
- filehandle = open( output, 'wb+' )
& N* H8 W1 g% Z5 T3 O - for i in filename:
! [8 ?7 F# O: E8 K5 G) A\" i4 ? - f = open( i, 'rb' ), i0 `# d! b- j, X7 G; b
- filehandle.write( f.read() )1 N7 | t0 j. A& x, X% w7 i
- f.close()
& Z2 w: L5 Q9 q - try:* |6 ?8 \5 U) Y( I7 s; a% Y$ n
- os.remove(i)- R9 H4 h: z' T0 u! u2 {
- pass
# W2 [, b# P0 O4 f. [* }2 z - except:' v+ I; p, M\" [( b, U' Z+ D( K' ]
- pass
# g% l Y8 H# M3 e - + `3 W3 q$ u8 r2 E3 W7 R
- filehandle.close()8 P0 I: C\" Q7 G- q* m
-
, O% D- t& c% ?9 c\" h2 K* \; Q9 C& d6 ? - if __name__ == '__main__':% i\" p! e, I9 i* M\" C. ^ U3 g8 q
- url = "http://xz1.mm667.com/xz84/images/001.jpg"
. z+ r: l1 ^8 D0 G/ A - output = '001.jpg') a2 c' U7 t1 r8 i& v* c' C
- paxel( url, output, blocks=4, proxies={} )
复制代码
) C2 p/ [) Q' r4 \ |
zan
|