- 在线时间
- 480 小时
- 最后登录
- 2026-6-1
- 注册时间
- 2023-7-11
- 听众数
- 4
- 收听数
- 0
- 能力
- 0 分
- 体力
- 7823 点
- 威望
- 0 点
- 阅读权限
- 255
- 积分
- 2934
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 1174
- 主题
- 1189
- 精华
- 0
- 分享
- 0
- 好友
- 1
该用户从未签到
 |
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。- #!/usr/bin/python: _& X) u/ a& m1 Q! x$ h3 @
- # -*- coding: utf-8 -*-) K3 [4 W$ j* D# G! [% g6 a$ \
- # filename: paxel.py
1 ~; j' n8 n/ G% }( g# U& F - / s7 X8 g9 |6 H0 v$ R( p% j
- '''It is a multi-thread downloading tool2 J U/ l$ r4 Z% _0 d7 w- o
- - n' A5 A! ?8 j3 H+ v/ n) D
- It was developed follow axel.
9 B& I2 k/ } l3 `) y7 Y\" K - Author: volans
; Q3 {$ S* @7 H' s' r, | - E-mail: volansw [at] gmail.com1 I# D3 G# X- R5 h0 Y$ z+ o d6 n% m
- '''
4 g1 b' z2 m' i2 A( l8 A% ?9 B - 6 v3 b4 x; x2 f; X: g+ H
- import sys& g4 K; Z) y5 J
- import os0 v5 l+ J1 V5 C( x
- import time
3 Y' Q1 N8 Z/ i - import urllib0 A, G* E6 ^, v( c, [: `9 \
- from threading import Thread! `& @1 E( k1 q3 g7 p. R/ X! I
- & b5 M* g+ l# F2 G, l2 ~
- local_proxies = {'http': 'http://131.139.58.200:8080'}
! c; P& R5 v1 t2 y -
9 s+ ~6 m+ K4 A5 |' M0 d4 U: w - class AxelPython(Thread, urllib.FancyURLopener):* l0 L9 ?+ p4 q0 z8 x2 y. [
- '''Multi-thread downloading class.5 a; _5 O* E6 `4 b4 i4 U$ I7 B
-
8 I# M( N; ^, @0 F$ x4 g - run() is a vitural method of Thread.
* C& u# n; X ?2 E; Q - '''
3 c& W$ K/ }. z: Z. l& u - def __init__(self, threadname, url, filename, ranges=0, proxies={}):\" d& ~5 p/ [\" D
- Thread.__init__(self, name=threadname)- G8 u3 [' J8 z: i. _9 m: O
- urllib.FancyURLopener.__init__(self, proxies)\" L, l\" |/ w. N8 t: h; ~! B
- self.name = threadname
8 V7 }+ Y2 E1 h\" \) p: W& c - self.url = url
; G- ^6 @: e& E - self.filename = filename0 l# z8 Z: L9 M\" \\" O! I
- self.ranges = ranges
8 H3 C& a\" |% K% [; q! n - self.downloaded = 0\" d0 Q3 }' d1 M) e
-
5 T5 ]* P. ]1 {( E. v4 a - def run(self):& g9 X0 O' n\" `; K9 x0 x- }
- '''vertual function in Thread'''
# v. G m, {4 q1 c& b - try:
1 P+ ?2 ]+ d$ ?* `; v - self.downloaded = os.path.getsize( self.filename )/ T7 a# U$ ?: ?4 C# O2 w+ M
- except OSError:
+ n. f/ O0 [$ U& }1 J- X& a: G - #print 'never downloaded'5 h) N3 E3 {5 f6 J v* C4 ~
- self.downloaded = 0
8 R' h* V: {* T! z -
- N0 @' S* | P6 Y1 t - # rebuild start poind, c0 p0 J J2 R, d6 W; C( h: ^! l0 @
- self.startpoint = self.ranges[0] + self.downloaded+ V- i- t\" R' \ F% R1 L
- 8 _4 ~5 U8 S1 Q9 D' ~\" G8 q' u
- # This part is completed
: L T$ L2 i9 ?5 Q* I$ ^ - if self.startpoint >= self.ranges[1]:! h) X2 U6 A3 E# d3 i& N; j1 q/ u. Q
- print 'Part %s has been downloaded over.' % self.filename
& l, D) J( e+ K! W l$ ? L X - return- P* j. X) d- A3 k- @+ }: m% h
-
' D) b0 `& ]' B1 J) j - self.oneTimeSize = 16384 #16kByte/time
\" p8 ~6 s2 f! \\" b - print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])' P- P o9 ?2 ]4 I
-
4 w s6 Y5 P$ W. A; D- D9 _ - self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
5 r) I( ?% k1 H g - 4 {* J( [\" @+ {7 Z2 z I4 V
- self.urlhandle = self.open( self.url )\" r& b3 ?9 H$ |/ h2 M/ t2 K& ?
-
+ G' Z# k, [\" @' [ - data = self.urlhandle.read( self.oneTimeSize )
; B0 }7 l) Q/ l% K( l7 `7 f: U X - while data:2 o& v. o' ^# l
- filehandle = open( self.filename, 'ab+' ); b/ w! C0 c+ M4 \5 s9 t
- filehandle.write( data )1 i6 H$ c$ t; Z\" }1 q2 t( r/ z; \( w
- filehandle.close()6 `# z: E, @, h\" o: L
-
' b( g$ G) r' [( C7 I - self.downloaded += len( data )5 k6 r3 `$ j9 C/ X1 r; D1 S5 i\" I8 b
- #print "%s" % (self.name)
0 ~$ M: B! C4 f4 L8 d - #progress = u'\r...', k: _3 S. y. |3 G' a4 u2 f
-
: f6 o0 W/ M. U E - data = self.urlhandle.read( self.oneTimeSize )
4 x, ?5 a5 n* x - 3 j4 q# X: T2 |+ H6 T; P9 }& Z
- def GetUrlFileSize(url, proxies={}):* V2 P7 \: K. @1 w/ I
- urlHandler = urllib.urlopen( url, proxies=proxies )
\" c! C% ^( b, Q9 H: D- L) o - headers = urlHandler.info().headers
$ \+ ]% r# P9 k\" [) W2 J4 R# B - length = 0 e% s# V& f9 u! N) @3 k5 ?* H
- for header in headers:
0 l* ~5 r4 C* P - if header.find('Length') != -1:$ }$ D2 o. p% J) W( N1 Y t& }
- length = header.split(':')[-1].strip(). u0 e6 k# c+ X7 \) b5 V/ K
- length = int(length)3 [7 t; Z$ d0 U7 ~, H
- return length, {- w ^) L9 K4 M
- , r; F\" O0 {: Q1 S8 m
- def SpliteBlocks(totalsize, blocknumber):, e* q% `# ?' i4 x
- blocksize = totalsize/blocknumber
* g& `! f# i) S) J! h& V/ u' y8 q - ranges = []: p8 Z# Y( N/ ]: L
- for i in range(0, blocknumber-1):1 {0 M* F' M' n8 f
- ranges.append((i*blocksize, i*blocksize +blocksize - 1))4 b q; P; m0 [% w0 e, v
- ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))- W y1 C0 [' E% A! k1 s
-
* K- Q$ @& S7 {5 I$ o- J - return ranges
4 _5 S& n' T0 l! H - def islive(tasks):# j% [% f9 ~1 L7 V\" g! |0 Q
- for task in tasks:
: Z: r6 Z( T8 C- y2 F9 ~1 l. { - if task.isAlive():
G# o8 o) N% q0 o( H1 H\" E - return True) O5 k6 h* |3 Z& e# Z0 R
- return False+ l) M7 s) R# L\" p [1 {3 z p* |
- / V8 x\" M2 F( Q; ~$ Y1 T
- def paxel(url, output, blocks=6, proxies=local_proxies):
( i3 L$ C$ Y* R3 n6 P G; z+ r - ''' paxel# H* E A\" x$ p; A
- '''
, {4 h; {( i# |- p- n& M - size = GetUrlFileSize( url, proxies )
# H4 W- ]2 R) E+ w: m - ranges = SpliteBlocks( size, blocks )
( Z8 R1 ~4 T5 r: J. M x -
- e g$ a+ Z2 O5 p\" k0 h - threadname = [ "thread_%d" % i for i in range(0, blocks) ]9 u7 m5 F. U0 D$ T
- filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
$ V; h+ B2 p& T% O - 8 r! m9 e) y8 w8 U- P2 M# j! f
- tasks = []
1 z9 X' h\" s* y - for i in range(0,blocks):& L; }% Q0 O' {2 P, o
- task = AxelPython( threadname[i], url, filename[i], ranges[i] )5 h$ g9 ]( q- _0 {
- task.setDaemon( True )
3 j5 B8 _5 s6 Z# m- N& \7 z2 q - task.start()- r9 ~/ A/ g5 @( c( d* { z9 f
- tasks.append( task )
. C( |- d; y) v: G2 c& K# c8 r -
0 L. i0 u6 ^' P e - time.sleep( 2 )
! g- y! X! b$ p5 \ - while islive(tasks):/ I1 x% p& Z8 y- L0 N/ j( |
- downloaded = sum( [task.downloaded for task in tasks] )
. Z) t) ?/ e) {- R - process = downloaded/float(size)*1002 n: L$ }\" Z1 v2 B6 J5 c: e
- show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
9 l3 l! {# T d9 }' [0 ] - sys.stdout.write(show)$ v( B; M0 V2 f5 e, S& {4 {
- sys.stdout.flush()* f\" J! E) I2 Q; T
- time.sleep( 0.5 )
7 B6 f4 L. F, v- q - 4 i, q1 |1 t( J) d7 ^: d2 T
- filehandle = open( output, 'wb+' )
& u% m7 E+ A% C - for i in filename:
: F \, x$ o- c9 C/ s2 K1 h, d - f = open( i, 'rb' )
, U9 _/ c/ h% Y0 a% W - filehandle.write( f.read() ). X9 Y |! w+ `
- f.close()
0 Q. s; y' m5 i8 I6 V - try:
% ^! @# c& P7 q0 y - os.remove(i)' @! C7 h G2 S) c2 k5 i, e: l
- pass
) T( Z t( V6 t# U\" R# q - except:
. b' f\" b, K6 ~9 L8 V - pass+ `5 ?' c1 j9 U) I, b I
-
' N# f# N S& u\" {0 [% v- v, {; g - filehandle.close()
, [3 N$ }; J0 [8 ~3 C: F - \" Q\" Y u& n5 T
- if __name__ == '__main__':\" ?- u; @2 ~5 n0 Q* \+ X/ w. b
- url = "http://xz1.mm667.com/xz84/images/001.jpg"3 {/ k% Y( w0 `
- output = '001.jpg'5 H* \+ P. _! j. k9 z
- paxel( url, output, blocks=4, proxies={} )
复制代码 e. { N$ F3 i: L/ G1 t0 ]3 \0 Q" u
|
zan
|