- 在线时间
- 471 小时
- 最后登录
- 2025-8-11
- 注册时间
- 2023-7-11
- 听众数
- 4
- 收听数
- 0
- 能力
- 0 分
- 体力
- 7623 点
- 威望
- 0 点
- 阅读权限
- 255
- 积分
- 2867
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 1160
- 主题
- 1175
- 精华
- 0
- 分享
- 0
- 好友
- 1
该用户从未签到
 |
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。- #!/usr/bin/python! P: U1 V) j2 q, I8 ?' K
- # -*- coding: utf-8 -*-
4 Y( t' T3 [2 s' o) f - # filename: paxel.py; |, J: [9 K2 l4 d7 a4 |/ a
- m5 Q% _6 F1 V4 D- L8 s' c
- '''It is a multi-thread downloading tool' v, J\" w! F% W8 e9 K\" s
-
% P3 @4 K) A# J1 ]. R) P W - It was developed follow axel.
8 x1 E6 D) C9 t/ Y9 z; h - Author: volans
% J% A: Q2 |8 l2 `( k5 m% l9 @5 B+ | - E-mail: volansw [at] gmail.com, V. g- j* B& |9 f
- '''
' g h/ l! ~. \1 s* G -
! Z) k! n5 D\" _ - import sys\" O/ l. y; ^4 z. a7 k8 B
- import os
7 u) s- ?5 U9 O# [4 L1 D, [ - import time
5 U. h- q. \! K B) P - import urllib9 [+ K% t% q, e3 ~
- from threading import Thread, Q. g* M/ G$ Z2 c, H6 j
-
4 T, a' }3 m4 e4 {. i8 Y - local_proxies = {'http': 'http://131.139.58.200:8080'}
3 `\" \0 a7 `6 y. V/ L: C& p - , m2 b# x\" S$ z9 Z* l& U
- class AxelPython(Thread, urllib.FancyURLopener):1 r2 [: a) E0 ^2 F
- '''Multi-thread downloading class.( O7 K a$ f! K) Z/ f( H! V
-
# M& y) d `( a/ b2 l - run() is a vitural method of Thread.
1 F& A) \% s\" S6 ~' x |% ~' G - '''
+ E' u/ y; ^( Q4 D - def __init__(self, threadname, url, filename, ranges=0, proxies={}):
/ l( }! c$ Z/ H- v\" F! Z! @ - Thread.__init__(self, name=threadname) I3 g/ E9 e. I1 t1 z
- urllib.FancyURLopener.__init__(self, proxies)
5 k4 I8 O% L( U' p' E - self.name = threadname8 [+ q/ f\" }( d# @& |7 g
- self.url = url8 m, W' c L4 N. l, N
- self.filename = filename8 i9 m8 y( [\" I
- self.ranges = ranges2 f3 U& v5 s* F! _& U4 V* U0 _8 e
- self.downloaded = 0
: I2 g0 ~* Z4 y4 O. q$ p - 6 o2 s2 G1 T* O
- def run(self):; ?' ~7 ]- k3 \4 U, H( O
- '''vertual function in Thread'''
+ `/ Z0 z- @( v\" G- U\" R - try:) P* Z* R6 O6 m: {% k
- self.downloaded = os.path.getsize( self.filename )
$ p$ n( D$ U$ ?$ q - except OSError:5 {: z+ e2 T\" G% B) A3 c
- #print 'never downloaded'* h+ A9 ~4 o% S4 e
- self.downloaded = 0( Q$ z% t3 I/ B6 ]+ H: M$ P) d\" c
- 4 E% j1 o I$ ^. u$ j6 i
- # rebuild start poind
, L. |% k2 M! b9 _ - self.startpoint = self.ranges[0] + self.downloaded: [1 Q; g1 ` j9 W/ E% n) K1 f: {
-
9 \0 @ C\" l2 ?3 t - # This part is completed
* j2 s% d# a* w' j - if self.startpoint >= self.ranges[1]:
- j' f% }& c8 K. ]3 O$ `; d - print 'Part %s has been downloaded over.' % self.filename( V. ^4 f7 L; I% }
- return
0 t2 b J8 [# n$ I( V0 _ -
! s, P+ o [$ ?4 E( k - self.oneTimeSize = 16384 #16kByte/time1 b' r0 a* L7 a
- print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1]). X l/ X\" r( f4 m3 j* m$ V
-
8 l\" D4 {! v. H! c: u( b - self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))7 ?& D* J6 f: G3 Q S\" e( |
-
% S* M+ n/ B- g+ E - self.urlhandle = self.open( self.url )! q2 C\" f2 a2 @% }
-
) q+ |& y5 Q7 J: Q: r - data = self.urlhandle.read( self.oneTimeSize )# N9 z7 R O! {9 n3 |
- while data:9 J9 {; G* x9 j' s* u) |
- filehandle = open( self.filename, 'ab+' )$ A/ X9 q& V5 s/ ?) u
- filehandle.write( data ): H) s! r! p/ @
- filehandle.close()
0 E8 F& _% Y, g9 ` -
5 M1 |3 h9 R v# l1 Z+ q b - self.downloaded += len( data ), R9 X2 a2 t. O! g% I9 I
- #print "%s" % (self.name): G& d* ?- i l# c0 p6 M& Y
- #progress = u'\r...'
3 b9 ~$ M4 i- N, K5 } - ; n3 O0 Q' K/ v+ ]5 \3 v0 N
- data = self.urlhandle.read( self.oneTimeSize )$ D& n3 D. b) U
-
3 J, y a! Y. N' L1 j c* n6 Z - def GetUrlFileSize(url, proxies={}):, X! H8 c0 O. `9 c2 G% o
- urlHandler = urllib.urlopen( url, proxies=proxies )2 T, G: _3 y6 O; m# @
- headers = urlHandler.info().headers
K' Z3 b: N! U9 d; P J - length = 0. M: L1 [& m7 s5 c- T
- for header in headers:
: O\" u$ l. x |& w - if header.find('Length') != -1:
/ L, J/ o# r' A) h# A - length = header.split(':')[-1].strip()
- h7 O E% D: h\" D& @8 Z - length = int(length)8 w$ ?$ u8 A; ?( t, ~; U
- return length5 t' j2 H* I& w7 m: Y
- . P+ t) _( z2 \$ Z+ j
- def SpliteBlocks(totalsize, blocknumber):) r4 P) D2 o. m! T# Y
- blocksize = totalsize/blocknumber
% F% Q! p0 v- j( { - ranges = []
% F7 A; d- Z1 {7 G5 y! @6 {; F0 y - for i in range(0, blocknumber-1):1 E0 m6 Y0 o: G) S1 E2 H& i: h; i
- ranges.append((i*blocksize, i*blocksize +blocksize - 1))$ s( G3 i# W+ [3 p5 Y0 a& ~
- ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
# V) g: ~, `: y7 ]0 c+ Y3 v - , r3 G9 Y! ^) D$ t\" q% I, |- |/ A D
- return ranges2 e7 D7 X* S' e! g; i& y
- def islive(tasks):: t8 @' O\" U& L\" ?/ X
- for task in tasks:
$ l: h% a: w/ u: R9 e/ q2 D - if task.isAlive():7 B; s& ~\" H) C9 w
- return True
( }6 n. A$ }3 u1 z - return False
5 E7 s. ^$ R* `1 ^& Q\" J -
# I# D4 r' P& V9 }5 @ - def paxel(url, output, blocks=6, proxies=local_proxies):
G' s8 i\" y' m - ''' paxel' w# W4 f' g9 U, Y% W- V
- '''' M\" V; r4 n. ~3 R$ z: k
- size = GetUrlFileSize( url, proxies )
1 t! @3 k& T4 v; G$ q0 ` - ranges = SpliteBlocks( size, blocks )8 |2 O, S W0 S# z
-
- [# }) ~$ S8 ^\" @' h2 U, `9 z - threadname = [ "thread_%d" % i for i in range(0, blocks) ]3 S* ~: M& [\" X- F0 N5 ]9 T) d7 \ h
- filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]# n$ [6 ]5 b0 \/ ^+ _6 W
- 6 Y9 s4 q) c2 l; J7 v
- tasks = []
1 g0 @: s# j4 [6 W; u9 U - for i in range(0,blocks):3 S- d2 M; P; y0 f; a8 c* d5 T
- task = AxelPython( threadname[i], url, filename[i], ranges[i] )
5 \$ K: T6 i6 z& i( [5 Q/ O$ u - task.setDaemon( True )2 l6 b5 m) I6 j0 [
- task.start()1 F# X' p7 d6 y
- tasks.append( task )
! o/ \6 P# H7 Y4 N' U- Y* [* { -
/ I/ ]2 W0 P9 @) b- O - time.sleep( 2 )
, z3 P' p' a; H$ S$ L0 i9 k - while islive(tasks):
5 ] ~; h$ M& F0 y) b2 M$ N - downloaded = sum( [task.downloaded for task in tasks] )' h1 e6 e$ n$ w\" Z! q, d- k8 N% M3 L
- process = downloaded/float(size)*1007 T; l- g: n. M9 ^2 o
- show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
- O6 j* F+ F+ g+ k0 } - sys.stdout.write(show), r% e$ R/ I- V/ Y
- sys.stdout.flush()
+ ^& _! I1 l) U9 {& k- F/ P0 o - time.sleep( 0.5 )
+ z8 L) j7 c3 F c) m -
' u* }\" o3 m! E - filehandle = open( output, 'wb+' )7 B7 B6 Y\" K$ h, k9 ~8 M. v
- for i in filename:
. [% B4 P3 U# l0 ]) s, W - f = open( i, 'rb' )
- l% c. }, y. W* C& W/ i - filehandle.write( f.read() )
( d' j4 ^9 d2 y+ U1 d) y5 _& |1 ~ - f.close()9 C% p& H+ F2 f# p
- try:
* A$ q( L, U8 L' K - os.remove(i)
8 [# o( J: |0 m' V0 q1 i7 z - pass, y# |$ n( H- Q( P6 y+ S
- except:
) `% m2 \+ E6 K) h* a( r - pass
9 Z* ]$ ]5 e8 Q% g# @ - 4 \: I. t0 W: J$ Y
- filehandle.close()% F' ]\" {0 a8 r\" G
- / M4 P1 _& o! g% J; a
- if __name__ == '__main__':5 d\" l2 T& F# a- g4 P6 l4 t
- url = "http://xz1.mm667.com/xz84/images/001.jpg"! S: E, Q7 h, f2 X9 S7 g' T
- output = '001.jpg'
4 }+ z+ u3 @1 b - paxel( url, output, blocks=4, proxies={} )
复制代码 * q# w3 p: \ t3 V, U" A
|
zan
|