- 在线时间
- 479 小时
- 最后登录
- 2026-4-17
- 注册时间
- 2023-7-11
- 听众数
- 4
- 收听数
- 0
- 能力
- 0 分
- 体力
- 7790 点
- 威望
- 0 点
- 阅读权限
- 255
- 积分
- 2923
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 1171
- 主题
- 1186
- 精华
- 0
- 分享
- 0
- 好友
- 1
该用户从未签到
 |
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。- #!/usr/bin/python' d9 E0 {5 _- Z# n
- # -*- coding: utf-8 -*-
4 Q# M, z7 ^* B( z\" ` - # filename: paxel.py e7 `/ G/ G8 B7 i$ `1 j6 L
-
* x/ {% `. w( y6 _* b9 x - '''It is a multi-thread downloading tool* {2 g( u\" S; r! w( F
-
9 H1 g H+ j: m* z9 R - It was developed follow axel./ L+ e; ?$ J6 U- t1 {9 ]9 b1 `4 T; ]
- Author: volans4 B- ]& d/ D4 k
- E-mail: volansw [at] gmail.com, ?/ j8 X2 V* ?( E/ d
- ''' W& A) B1 `3 n
-
. `# F9 G. a! d - import sys/ V# _3 o' c' t1 v; |; s
- import os
5 C0 `- O! N1 a$ X B7 H - import time. H( i5 |+ j( ^\" K+ V9 t8 O
- import urllib
+ S; Y- m U6 r; s) M& {4 r! @ - from threading import Thread0 t R: ] i( L
-
4 q) V% B1 Y7 Q' z3 r - local_proxies = {'http': 'http://131.139.58.200:8080'}
- ]0 Y+ R+ d+ t -
\" d. k' U3 H( d' Z - class AxelPython(Thread, urllib.FancyURLopener):
% Z% W% ]# ~, ]4 b - '''Multi-thread downloading class.
6 m3 z2 B$ l% h\" N - ) a( w+ I3 L1 j7 \3 L* [
- run() is a vitural method of Thread.) g( M* r/ \% B. p# k\" C% l
- '''! C0 v% ~# [* Z3 ~% S7 u
- def __init__(self, threadname, url, filename, ranges=0, proxies={}): S- M\" i& I8 a6 A& v
- Thread.__init__(self, name=threadname)0 J5 T9 Q! F5 z' C
- urllib.FancyURLopener.__init__(self, proxies)8 o! Q# C; [: u& [. _( V; {
- self.name = threadname
# j( [/ O, {\" H! N' ?% d7 Y9 B8 c - self.url = url1 u, K. n7 P* t1 x2 w
- self.filename = filename2 x) Y\" c `- E
- self.ranges = ranges B1 E4 f0 [3 ]* }9 k
- self.downloaded = 05 a2 B! m3 ~) E$ B6 o\" \\" h1 n
- ( R- m( j' Y1 \6 l
- def run(self):* f+ G, A; J3 N& @. ^1 f- j
- '''vertual function in Thread''': W1 g! y; c, h( Y) Q' H6 |$ [- {! W
- try:
7 n N( |\" d/ {( o$ P' \ - self.downloaded = os.path.getsize( self.filename )) C! Y' G! c0 v
- except OSError:
5 |+ E% x0 e& r' V2 u# }. u - #print 'never downloaded'
% |( b. l+ k! n - self.downloaded = 0
2 x5 j) u; w% O2 q( m. l -
& b: a Q9 e% `3 U& P2 U - # rebuild start poind
\" O0 Q: A( h: D5 i# \$ T - self.startpoint = self.ranges[0] + self.downloaded' B' E' Y, N% k, }
-
1 s4 |( Y* x X\" b( ?4 p - # This part is completed- X9 c3 Y( [% G% l z
- if self.startpoint >= self.ranges[1]:! \& W; G- O- m7 j% s
- print 'Part %s has been downloaded over.' % self.filename$ i0 L9 }; z0 _\" q3 c
- return
p3 r1 ]+ A; M5 S -
* Q5 ^* w) S# ^ - self.oneTimeSize = 16384 #16kByte/time
- j) a: o W& P! E\" Y - print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])% a# G5 Y7 x* p; O! A
-
3 t0 u- f5 N, s - self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))\" C. d& J6 P- J9 ^\" C5 T
-
9 W ]\" Y9 B\" m, Z - self.urlhandle = self.open( self.url )
' U! [0 b$ F) S: {1 i3 p# _4 z v) ~ - 3 ^# f9 i* S, p9 W, U2 |( Z8 u _3 i\" D2 m
- data = self.urlhandle.read( self.oneTimeSize )
/ [: i; J\" S- S - while data:& ?( e9 k/ J+ j3 P- c
- filehandle = open( self.filename, 'ab+' )+ @7 H& e- d( Z, Z/ ?) P9 s
- filehandle.write( data )
5 B5 }* H. d, U3 B - filehandle.close()) H: ?\" w2 G8 ^
- + ~0 @& d2 O' k& k# `
- self.downloaded += len( data )4 \& q# f' ?/ [4 J
- #print "%s" % (self.name)
2 Y& X0 c9 o8 g9 o3 u - #progress = u'\r...'4 o* l3 F/ U) i5 s7 d* D0 }6 I
- / k, G' E1 @2 y# ~' v
- data = self.urlhandle.read( self.oneTimeSize )* {9 j* P+ Y\" r7 ]
- ! z- m: @( O! s# A# M+ n8 w
- def GetUrlFileSize(url, proxies={}):1 d% }& K. z2 ~- G
- urlHandler = urllib.urlopen( url, proxies=proxies )
* x\" m: A7 {* H' I - headers = urlHandler.info().headers! v1 x3 _' @4 a8 ?# N% u5 ~# }
- length = 0/ n6 [+ \# Q- D: r
- for header in headers:
5 _' t) D6 o8 E! c7 \. z - if header.find('Length') != -1:9 n; |5 i H8 {) g
- length = header.split(':')[-1].strip()
. R$ T- w; L4 O: B - length = int(length)
7 g+ L+ V& X: s/ G5 a/ q - return length
' t* O: s3 B$ k. j7 h. O -
- ~, ?/ _; H: _& e - def SpliteBlocks(totalsize, blocknumber):! F1 k6 c) P2 L& R. L, Z
- blocksize = totalsize/blocknumber
2 U/ G- z- M9 ^* g! ^\" o) m9 D - ranges = []
) V# L* U3 ~: ^5 S t - for i in range(0, blocknumber-1):
2 Q. ^) v1 }* ]& t - ranges.append((i*blocksize, i*blocksize +blocksize - 1))/ \4 w9 r, m- i6 S9 k+ T
- ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))$ b; ?9 I9 R1 k\" h& C
-
- g! X2 i$ V6 l$ O& i - return ranges8 h% S, B+ u& I% w5 P% ?/ b
- def islive(tasks):; G5 N( w2 u3 {6 a O4 c, p; ^, a
- for task in tasks:
9 J# \1 \ `$ h# e* x( U( o! Z - if task.isAlive():
5 w$ M1 T4 V0 u) |, d - return True
+ v* w\" D) W+ @) k: o q! }7 { - return False
3 _( c, o, n. X1 E -
4 z- B0 z8 ~: V1 ?2 D - def paxel(url, output, blocks=6, proxies=local_proxies):# |6 p! ]$ y+ R( V7 {+ \+ D
- ''' paxel
+ ?3 C; M0 t' g& U - ''': e' Y; o, A/ W( p
- size = GetUrlFileSize( url, proxies )$ p: D3 P& I7 Q/ X
- ranges = SpliteBlocks( size, blocks )8 {- U! R$ c, y! c
-
' ~8 E/ {3 E n - threadname = [ "thread_%d" % i for i in range(0, blocks) ]% {9 }0 W. a5 K& |/ w. S4 g
- filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
* W( e+ ?* | A' R\" } -
8 Q\" n8 N; d' S1 t+ ?& P - tasks = []& r& e$ d% d9 E& {2 t! _
- for i in range(0,blocks):
! D' ?# d8 V\" j) A- _! M - task = AxelPython( threadname[i], url, filename[i], ranges[i] )) l0 q7 @. q9 W1 [5 _! q
- task.setDaemon( True )
0 G- z1 }2 x' J8 S- p5 \+ q; C! q - task.start()4 G+ A( i7 d6 D `0 O
- tasks.append( task )
( t1 Q\" _9 Z2 \% i5 v9 W* f+ v% ~ - ; f2 `/ B7 J* W( `6 m
- time.sleep( 2 )' v\" I9 |7 b+ ~
- while islive(tasks):
5 t9 t5 ?+ ~4 D) d4 P. H4 O - downloaded = sum( [task.downloaded for task in tasks] )
# V) `; C: I: a2 R. u* y - process = downloaded/float(size)*1002 L8 y2 o$ f9 S) ^* X
- show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)/ D5 d5 m _7 R, Q0 ^! m$ U
- sys.stdout.write(show)9 f# A: a\" k% T5 D0 g1 g# Z- e
- sys.stdout.flush()% k/ n0 B6 P5 r
- time.sleep( 0.5 )
^\" N# @# K, e: O c5 [4 o- A6 S -
2 T, J9 n) t! K/ Z& C8 ` - filehandle = open( output, 'wb+' )
3 A+ A8 \/ X, R. `/ i+ B j - for i in filename:3 G\" G, p# f; R* r
- f = open( i, 'rb' )$ L- \6 M2 b7 X, _
- filehandle.write( f.read() )! f$ A) d, r5 N4 ?3 k' n
- f.close()
3 a8 S- T. t1 X0 R) | - try:
; ? S9 b% }% N+ E. M! t - os.remove(i)8 q, \7 {+ b\" i+ L
- pass
; v0 k3 ~) r8 A4 i5 F* J - except:
. J% m8 {0 O( U - pass
2 w2 a' B$ ^& p\" y3 a - \" t) M5 @\" o0 |2 N
- filehandle.close()
/ z8 i: ~\" ?2 m5 t' h5 g. o% h -
7 Z1 p* l: F: K1 V/ Z2 M - if __name__ == '__main__':
: ?2 w) F8 g, b. v\" v1 U - url = "http://xz1.mm667.com/xz84/images/001.jpg"
6 H3 P& ?/ q2 g( L* C - output = '001.jpg'
3 e1 V+ N( y1 {* U( Y# |/ Z - paxel( url, output, blocks=4, proxies={} )
复制代码 1 v; ^/ q2 y4 B
|
zan
|