数学建模社区-数学中国
标题:
多线程下载图集
[打印本页]
作者:
2744557306
时间:
2024-3-31 17:18
标题:
多线程下载图集
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
#!/usr/bin/python
' `9 r. ^/ s& e4 V* M! R
# -*- coding: utf-8 -*-
. G0 F; U/ Z0 h7 c
# filename: paxel.py
7 `. L" z; T' E/ F) y. }
# o# r. u5 A! D6 |$ n* Q0 E9 e
'''It is a multi-thread downloading tool
+ H% f2 I4 R/ d! J1 s* L
6 V t# U2 G, Z+ O5 M4 J
It was developed follow axel.
I: r' @7 h1 z3 x
Author: volans
+ W8 b! S, z# f2 T+ ~, _' ?
E-mail: volansw [at] gmail.com
; {. w, B' j4 V* c8 t) g1 J: `
'''
$ T" h: q# e2 }+ A6 h7 |5 ]0 h9 \
& G4 {. c9 a" k- H8 [
import sys
$ Z5 n* g" u8 i" H: O8 O
import os
1 Q9 d5 y# [. L# ^+ O# ?8 x5 ?2 R
import time
. ] d8 g3 R4 F* h
import urllib
* }6 l$ T! ]' z
from threading import Thread
x) z& e; g% R1 n4 \+ `
: ], G$ V+ d0 [* a; P
local_proxies = {'http': 'http://131.139.58.200:8080'}
) O. s& \, Z" |3 t
( Q* h. v5 f' r# D6 }6 ~' b4 _
class AxelPython(Thread, urllib.FancyURLopener):
' q2 {) h* ]& {: c0 a' F
'''Multi-thread downloading class.
+ Z1 G; {8 N) j
9 g/ q* o/ ?4 D5 f& y' o7 f
run() is a vitural method of Thread.
, Z2 m1 i6 S2 T6 P- S
'''
* {- @! i# Z* l
def __init__(self, threadname, url, filename, ranges=0, proxies={}):
, c9 K) l+ _: B1 |2 J: H
Thread.__init__(self, name=threadname)
5 A8 b7 E+ c) g3 ]# ~
urllib.FancyURLopener.__init__(self, proxies)
& R) S* C4 ]/ Z! d+ v
self.name = threadname
" I2 a( K' o" ~2 @. v
self.url = url
) D) E, n6 x: Q- w3 r% k
self.filename = filename
' `" u2 a# g; r/ v9 G
self.ranges = ranges
% O4 y' ^6 r D g
self.downloaded = 0
O: K0 C( z# C! g8 x9 a. f1 q% }6 D
2 G3 U! n" z7 ?7 h
def run(self):
/ Z" U5 G+ e( ^) v! t/ a$ L
'''vertual function in Thread'''
/ C) D0 L ~2 z3 a9 s! Y" L% _
try:
* a4 P z8 X N
self.downloaded = os.path.getsize( self.filename )
5 k! [, k. M- L: D$ Q5 B
except OSError:
/ d, \+ `* o P
#print 'never downloaded'
9 C9 ]6 Y0 v. Z
self.downloaded = 0
1 e3 f# i- i" c6 ?1 d
9 [/ q# P% Q0 T& I, f' H
# rebuild start poind
2 W. `/ ^4 Z4 l9 @
self.startpoint = self.ranges[0] + self.downloaded
% |- U9 j/ U+ u- i% K. l
& f4 @3 h( q! Q5 L
# This part is completed
' Y+ `1 Z7 v& b, w' y; f! k: {& n
if self.startpoint >= self.ranges[1]:
4 _: m* p) J: P4 n7 C6 \; F
print 'Part %s has been downloaded over.' % self.filename
( Z2 Q$ U& \. Z$ S
return
! R! e- j! C$ e7 X4 A, p+ S3 B' G
9 S1 s' j, u1 [! ~5 K# }3 W. O
self.oneTimeSize = 16384 #16kByte/time
0 Q* ?+ j) D% I! ?7 d+ e# j
print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
$ S: A% r4 Q6 g& ]# h7 d
# c3 d; l1 y( C, ~5 q: M
self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
6 b$ [1 u( L/ ]9 O4 ]: s
; o6 a4 R( e6 P: G6 l. _8 F
self.urlhandle = self.open( self.url )
( s1 }" y& R- N6 M$ J' t
8 B& ^; S5 ?5 h( x/ F% _' t
data = self.urlhandle.read( self.oneTimeSize )
. T# q! m4 S4 E7 k0 b
while data:
1 @- h E5 ^7 |; g5 a! N2 W
filehandle = open( self.filename, 'ab+' )
6 N! U# w! Q$ c1 A, _2 h' I: r. @
filehandle.write( data )
, n) v6 d2 m5 t9 ?# i* g5 h
filehandle.close()
& d8 G6 {1 _ C" @, g n u
5 Q- u9 L R) C3 U( `
self.downloaded += len( data )
- D$ X! m- a7 i* H3 d
#print "%s" % (self.name)
8 W) [& @/ a. W# e: t/ N) K+ m
#progress = u'\r...'
& Y% L% j8 y# G' _
5 p2 ~7 M' \/ L. {* w
data = self.urlhandle.read( self.oneTimeSize )
1 `3 k% N9 \! j9 Z0 O
0 a/ p& A# G+ Q4 U2 ]
def GetUrlFileSize(url, proxies={}):
' i! c' |% e; a1 w; O; ]3 I$ ]
urlHandler = urllib.urlopen( url, proxies=proxies )
$ }; Q+ S0 q- z8 C I0 w6 y
headers = urlHandler.info().headers
, t) m. `& r# k* U3 e+ N
length = 0
( i) {) r R- W! ^4 g, M6 k& j- E* x& B
for header in headers:
: e. h9 ~% U* m8 j8 f
if header.find('Length') != -1:
6 S j/ j3 h1 ^5 h$ I( e+ g0 Z t
length = header.split(':')[-1].strip()
+ l" `* H9 s. i0 u% r
length = int(length)
; i/ }" Y% Y) L0 S, X* D
return length
1 C2 j# x" M: N; g
3 U2 F% i8 d+ l2 T$ S6 _
def SpliteBlocks(totalsize, blocknumber):
3 R) I. y& j3 t% o. v. @
blocksize = totalsize/blocknumber
# c' j) ]( H1 g6 Y m& E. T0 s
ranges = []
; ]0 y- q, j8 a0 V9 _; D
for i in range(0, blocknumber-1):
- r q+ n$ T# L2 l" Q5 U
ranges.append((i*blocksize, i*blocksize +blocksize - 1))
: c% X, i/ T6 n4 _5 c
ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
3 f3 ?1 i* ~7 e+ F7 w
! W. O) A) ?" W! r$ D
return ranges
4 ~% T* c- ]' a/ n: }; z
def islive(tasks):
' L4 y$ R/ T z7 k( T( T
for task in tasks:
6 e9 x- X0 y" Z; G; ~/ q
if task.isAlive():
0 V U6 v/ Y) Y6 @% ~% i1 B% _
return True
# l: N- D1 D; E. v9 p# H- H
return False
7 c- {& ~, B+ {
. s% c0 L$ N( M9 L8 r) _: v0 ^+ R
def paxel(url, output, blocks=6, proxies=local_proxies):
4 N) q/ v* \1 D( B& M) V2 k
''' paxel
5 V% v" K) B: v7 [# ]" l( a7 d
'''
& U" v! T6 q ^; {
size = GetUrlFileSize( url, proxies )
( B2 T- K( x7 D( N' S1 N( J
ranges = SpliteBlocks( size, blocks )
- W! ?4 n3 w! \- }: Q, H6 o
& e2 J( k& c6 d; g5 \9 T* A
threadname = [ "thread_%d" % i for i in range(0, blocks) ]
/ v ]0 I2 \( ?( Z, z
filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
9 C+ Y# q% l7 f. \
* b2 Q5 i( V" l! y
tasks = []
% S3 E" m8 ]% B4 }5 S7 Y) z2 |
for i in range(0,blocks):
0 I) G S. F7 k: v4 U! w: B
task = AxelPython( threadname[i], url, filename[i], ranges[i] )
( T( n8 s" I& d2 n/ f5 s. K- T& \5 E
task.setDaemon( True )
3 F% S+ i, _! N" |. s5 B
task.start()
6 L% i! [8 i- d
tasks.append( task )
1 B5 J0 j& c# d8 n' ^; n
# l5 \& ^$ t# i% i: L% u
time.sleep( 2 )
3 k* E% R$ u2 o
while islive(tasks):
* ~$ _) ~) q. J) A
downloaded = sum( [task.downloaded for task in tasks] )
% U: I( q1 k& M! j f
process = downloaded/float(size)*100
8 d4 c$ w6 c8 T% {- w3 m
show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
4 P2 x/ K1 Z$ _
sys.stdout.write(show)
3 S; c9 A. U f2 Y; V* |9 ~, V
sys.stdout.flush()
4 S) j$ _0 K) [( e. t6 y
time.sleep( 0.5 )
y: x/ M4 ~' a4 l; x4 i5 r
Y k! F4 x: d
filehandle = open( output, 'wb+' )
& b6 p. ^: j" N; W2 z# x* w
for i in filename:
9 z1 Y9 s6 c) |9 `
f = open( i, 'rb' )
9 P" E+ s4 o7 j. I) s
filehandle.write( f.read() )
, o5 m4 Y: \ S* M% `. ~
f.close()
+ K+ |" y3 O2 _3 @/ x
try:
3 D. \( B! Z4 @' H9 @) F3 n
os.remove(i)
9 b8 X$ n' P- X* N
pass
& |9 Q. ?5 ~$ s) y9 ]0 {# s$ \$ K* n) O
except:
; e2 L9 F# u `; ?
pass
* c. o* D/ ^' J
$ O2 i# w: S3 z; J, W9 C# K
filehandle.close()
5 a! f$ Q3 y+ ^, F, ? H
7 d2 o0 \3 b _( R5 {/ t
if __name__ == '__main__':
' X& W5 _( }3 p) l+ `) K
url = "http://xz1.mm667.com/xz84/images/001.jpg"
6 {/ M K8 A [' v# d
output = '001.jpg'
6 Y& [" _. k Z- p7 W
paxel( url, output, blocks=4, proxies={} )
复制代码
/ E: A. I* h* E6 W4 X% m( r
欢迎光临 数学建模社区-数学中国 (http://www.madio.net/)
Powered by Discuz! X2.5