- 在线时间
- 479 小时
- 最后登录
- 2026-4-17
- 注册时间
- 2023-7-11
- 听众数
- 4
- 收听数
- 0
- 能力
- 0 分
- 体力
- 7790 点
- 威望
- 0 点
- 阅读权限
- 255
- 积分
- 2923
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 1171
- 主题
- 1186
- 精华
- 0
- 分享
- 0
- 好友
- 1
该用户从未签到
 |
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。- #!/usr/bin/python
& U% _- |; t5 K w - # -*- coding: utf-8 -*-
8 Y/ n6 t s$ w& }9 u$ n) Q) U - # filename: paxel.py, X J/ ]$ N. P o
- ' ~5 V- l! `, f
- '''It is a multi-thread downloading tool' E% R- s: q, U: P$ {
-
& N+ L\" _1 d$ P) a. Y) f - It was developed follow axel.
% L$ E2 |6 c% n0 Y - Author: volans5 [3 A% C+ r0 T+ U0 h
- E-mail: volansw [at] gmail.com
' l3 @; o4 t, H8 u( S& n - '''6 i1 C5 `1 v0 p9 V0 c) p
-
: h& ` h% c9 Q o& E, x - import sys
. d B. b0 Q& W: C; M1 ^2 i - import os4 P' @- n# G' K0 P
- import time
, ?4 L- q$ z; P: u$ s - import urllib
; c: L0 b+ s& P* b/ \ - from threading import Thread) D- |% Q4 z) t% ^, M0 M
- 9 @4 x# G `9 F
- local_proxies = {'http': 'http://131.139.58.200:8080'}) ^- l# @% W; o& d3 P+ E' a5 m
-
. ]! Z! q9 V. _9 ?: c5 a - class AxelPython(Thread, urllib.FancyURLopener):\" J\" Z5 Y4 {6 Y$ u+ [
- '''Multi-thread downloading class. ^- P- r1 ]\" _6 A0 F* X' W+ o
-
! R4 m- L; H6 b% a. |& m - run() is a vitural method of Thread.$ }\" e6 E4 H8 V$ ^
- ''': } X# j+ F' [6 D1 T2 J
- def __init__(self, threadname, url, filename, ranges=0, proxies={}):
: b7 X3 V- X/ _0 c$ k$ [! { - Thread.__init__(self, name=threadname)4 g9 X0 J! W1 t' i/ \8 S
- urllib.FancyURLopener.__init__(self, proxies)) B( v/ I8 Q\" g; [\" M& v
- self.name = threadname
+ S* ?6 ]9 p& w9 \% j+ F - self.url = url
. j7 y$ k1 c* O4 T% T\" `+ Y; r4 O - self.filename = filename2 F- _! k7 A5 o, v( M9 ?
- self.ranges = ranges
# U b$ h* w6 n, ~: A - self.downloaded = 0/ W4 L4 M1 V' W' D$ u2 x1 f
- # j8 f! N k* u; `2 Y
- def run(self):
6 n3 I, s- |; v5 t\" ^! g - '''vertual function in Thread''': B0 T# V* W+ n0 G
- try:* r- k+ r- J; V _/ f
- self.downloaded = os.path.getsize( self.filename )2 }+ b\" ]3 e$ n( u
- except OSError:+ _/ z! e ]8 N. Y3 Z% u( k
- #print 'never downloaded') q& s- h4 S, Q8 e& r* F
- self.downloaded = 0\" J8 x% S2 s2 ~) m# S
- ( x8 r' V8 |. k9 t\" D- o
- # rebuild start poind
5 z' s, w' i% a: Z - self.startpoint = self.ranges[0] + self.downloaded
! C6 v! Y3 c* K. I -
! Y# y1 Y& l$ W - # This part is completed# Z9 Z- f* C0 j\" C' h* z
- if self.startpoint >= self.ranges[1]:
7 A% ]( B/ y5 X5 } - print 'Part %s has been downloaded over.' % self.filename2 w3 T4 m& m, m& T' U3 R E! d
- return5 K3 W9 }6 q, v/ ~+ _ d. g
- \" K T+ H( F; {: [
- self.oneTimeSize = 16384 #16kByte/time: ~1 ]: }3 e9 V* S- p
- print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
3 H% ^0 j+ r! @ -
9 H1 v! n# q4 g - self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
# a& N: g l( n. J0 F Z# \ - & N4 v: g: r H& ^0 Y) v* N
- self.urlhandle = self.open( self.url )6 @& c( @; w) V/ I3 w% q
- ) }! G8 p+ M4 L7 e
- data = self.urlhandle.read( self.oneTimeSize ). u$ }+ W$ _- W3 a2 h7 r* H$ f0 ^
- while data:' B' }3 } |1 `- D
- filehandle = open( self.filename, 'ab+' )
4 o. t% n4 n3 X5 c( [9 s2 ]0 u - filehandle.write( data )
; t( w0 ^+ q4 G4 J; b - filehandle.close()- {$ [\" d/ J4 `7 a. c) T
-
! H+ v& }) w9 {5 Q% v - self.downloaded += len( data )
\" t5 Q/ H9 l+ x1 `* y - #print "%s" % (self.name)
' C) ]8 M: `/ r1 d* q\" D6 }; N% i9 I1 [# Z - #progress = u'\r...'' f. A! o$ \4 G
-
' T# M& r J+ Y$ b2 k - data = self.urlhandle.read( self.oneTimeSize )0 q [/ S+ t% x' S: C
-
3 G5 ?8 [3 M7 K\" {' A. @2 C5 c - def GetUrlFileSize(url, proxies={}):
- S/ _! m E5 y3 ~) T - urlHandler = urllib.urlopen( url, proxies=proxies )
% @7 d6 W/ [9 g& I4 G$ a! {8 F - headers = urlHandler.info().headers
: m0 }+ l# N9 W6 z) } - length = 0
2 E, s1 O* N% u; u. h) h - for header in headers:; f( C) { P0 g) x0 \/ w\" G
- if header.find('Length') != -1:
- T/ s0 l; F# Y\" ^. O2 f5 f - length = header.split(':')[-1].strip()
R3 X' `% g9 L- K7 l2 q6 @$ F+ O - length = int(length)
- S\" x# ^2 o d8 M' Y - return length
d. k* }0 }8 X, L$ ? - 5 R( ~: a1 H\" n4 o. t9 R
- def SpliteBlocks(totalsize, blocknumber):
. y9 c6 n v. }% w# V; | - blocksize = totalsize/blocknumber
2 L9 Z2 Q4 m# ]$ ~4 G, i - ranges = []
$ U7 ^+ i( e( l- O, d - for i in range(0, blocknumber-1):
0 T9 g0 p( I) }7 |2 t( P4 I: o - ranges.append((i*blocksize, i*blocksize +blocksize - 1))9 \9 ^& i3 s; h9 f
- ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
5 y3 I) E5 T; l+ Z) k -
+ ~. F\" v2 D3 O1 `3 t4 e - return ranges
# Y2 F. A) ]* B4 T/ e$ E$ m. N - def islive(tasks):
/ q' d: _. q/ I- z - for task in tasks:7 P4 D+ e3 |. j9 J4 z5 ?
- if task.isAlive():
! k7 o/ k h' D, [* _ - return True
/ P7 h2 e/ ]% w k3 [: f2 l - return False( h9 H% V; P' y
- 2 x- f4 o! z' Q5 X- l+ M5 m
- def paxel(url, output, blocks=6, proxies=local_proxies):- \\" w# l) w6 }
- ''' paxel/ `: l- z8 b u1 Z4 i& ~
- '''& v6 B1 B$ @/ n p8 O, W8 n
- size = GetUrlFileSize( url, proxies ); L0 B) R2 B: H' j0 F* F* ?
- ranges = SpliteBlocks( size, blocks )
1 }/ V* c X- Y( ] - . H$ v0 A9 t; x\" P8 O }9 g
- threadname = [ "thread_%d" % i for i in range(0, blocks) ]# H- B0 L\" m; g\" `4 y8 l
- filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
. B! v! s9 Y1 b# t - * N+ X, o5 X0 E( f/ W: r
- tasks = []+ d3 s7 o& q8 R% u2 }+ T$ G- @) t
- for i in range(0,blocks):0 k) T7 |# H1 z) D1 N
- task = AxelPython( threadname[i], url, filename[i], ranges[i] ); o- @8 X# a* x
- task.setDaemon( True )/ B1 A6 E- [3 {; v8 G
- task.start()$ ]. m, w5 w- p- l( M& r% z1 x$ K
- tasks.append( task )3 G( p; F# t0 d6 l
- + o+ n) u; p8 y: Q9 V2 p0 k
- time.sleep( 2 )7 T; @# Z' t I6 H
- while islive(tasks):, Q# z- g; |: L6 E1 ^! H
- downloaded = sum( [task.downloaded for task in tasks] )
; t5 P9 W; H. A2 E0 @5 ~ - process = downloaded/float(size)*1006 {, O; ^; f' R5 W1 Z; J m8 f
- show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
: ?8 q* t @6 M) }2 [! h8 f h - sys.stdout.write(show)
: Z) j9 G\" I l4 B - sys.stdout.flush()9 B Q\" e2 q* H* _
- time.sleep( 0.5 )& h( o9 n5 R6 {\" U) L! x
- 0 D3 }9 h2 S! d; y9 `
- filehandle = open( output, 'wb+' )
0 [3 v I, n) a- c - for i in filename:( |* P2 \& M3 s9 w; v# e
- f = open( i, 'rb' ); ~( ^3 M9 b% v* c K _\" s
- filehandle.write( f.read() )5 x$ O6 O @6 M3 ?0 p
- f.close()\" r9 A0 X7 S, N% g' g6 d' O1 L `7 [
- try:
4 Q1 @3 Z5 F0 M/ Q( `* J7 I3 D - os.remove(i)
/ T* u! @5 G% R - pass
. f% |! W/ N$ C$ Y. }: d - except: j$ S$ C) y. P& B& m* O
- pass
7 D$ r2 V) ]( x\" R% V - u8 Y4 Q7 y0 C' E
- filehandle.close()
1 }6 ^/ ]) Q% H W0 \ -
9 Y# D( W. ?8 Y, V$ v( V8 s - if __name__ == '__main__':
, G1 l, v& O2 ^0 r - url = "http://xz1.mm667.com/xz84/images/001.jpg"
# `8 E3 n( V0 E. x: w! P) M, S+ K - output = '001.jpg'3 w) ~) {( g\" q# X. J
- paxel( url, output, blocks=4, proxies={} )
复制代码 2 X+ r, w8 K1 t
|
zan
|