数学建模社区-数学中国
标题:
多线程下载图集
[打印本页]
作者:
2744557306
时间:
2024-3-31 17:18
标题:
多线程下载图集
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
#!/usr/bin/python
7 u: o. V# ?4 x3 c
# -*- coding: utf-8 -*-
9 t% q; r$ @5 Q7 h! o' j
# filename: paxel.py
: y0 ^ Q) T% \
& q1 c6 B! R( D6 j$ k. S P
'''It is a multi-thread downloading tool
$ w% n5 X& d* @& D. | s3 R
5 |# L& N5 |3 B4 v( f- R7 t
It was developed follow axel.
/ J5 \7 V: Y/ G
Author: volans
5 _0 u! N. [/ X* c
E-mail: volansw [at] gmail.com
/ l2 w, D J4 O, C7 s6 p8 [
'''
3 }' O( k6 v* Z. _/ W* Q8 Q
- ?$ S' v. }$ M1 M" N
import sys
4 t6 }; n& q& `2 m1 B7 D
import os
0 g1 ~7 J1 a6 E4 X. l/ P
import time
2 q: q& h. K# N Q. S
import urllib
1 ^1 P, `) N0 @6 z5 b, {
from threading import Thread
: d$ ~! }! B# d9 V
* f9 `/ y( P; J; g7 S7 V) [
local_proxies = {'http': 'http://131.139.58.200:8080'}
9 k# M: o# c5 h8 E. t$ ^: `; _
0 a! g3 d1 c) D1 z4 c* }
class AxelPython(Thread, urllib.FancyURLopener):
: z! w# y+ u0 h9 Q: R
'''Multi-thread downloading class.
; T& }2 F$ r% B5 ?5 R2 k
( f# r4 C9 R$ \" m! H. W/ W9 }7 @ d: j
run() is a vitural method of Thread.
% m1 L2 P5 W) V1 t- t' e0 R
'''
% G' G) d% k; c* a/ c0 f
def __init__(self, threadname, url, filename, ranges=0, proxies={}):
, Y" s: c' m4 g( Q9 _8 ^
Thread.__init__(self, name=threadname)
: H1 o9 Z. p+ O' B( \
urllib.FancyURLopener.__init__(self, proxies)
9 g# t/ ^/ E3 T, z- r# L' _0 O
self.name = threadname
/ B& l% U8 ?. i: T5 l5 o8 J
self.url = url
% w/ q- M, B% p5 y( B3 R" I5 ?
self.filename = filename
. ~' t4 e. ^: }4 `- L! N+ \
self.ranges = ranges
: Y2 `' `3 u/ J. A$ L; t3 x! \
self.downloaded = 0
5 U+ A9 y! E0 n# l
/ v$ p" x) H6 O }( {2 b
def run(self):
2 s& Y8 R( `4 s% r8 S) @% b' d
'''vertual function in Thread'''
6 ] D t& J0 c+ [2 U/ a
try:
, @6 d3 p' S2 r$ Z! u
self.downloaded = os.path.getsize( self.filename )
# P# C+ g/ |, M5 a
except OSError:
+ g7 {3 w! ~, q) W8 u
#print 'never downloaded'
9 o( @) j/ S( a( `8 q# Q* n
self.downloaded = 0
9 X( X; ]2 O4 D
- j3 q* R! G9 f; g6 o
# rebuild start poind
9 R2 j6 g) W/ S* I+ N3 X
self.startpoint = self.ranges[0] + self.downloaded
6 d ?6 @# r/ h1 k
3 H+ C/ D" @- Q: M
# This part is completed
" }( a8 V5 i0 f4 Q O
if self.startpoint >= self.ranges[1]:
7 [$ I* ]; y) Q. `+ r: O) Z* D. }0 A
print 'Part %s has been downloaded over.' % self.filename
+ C1 @# j# Y% H) a% A/ A
return
$ l" S) w) J" {
0 ?* I# n: V9 n/ B B- K0 x; a
self.oneTimeSize = 16384 #16kByte/time
9 i: k; ~% s% i" k' u9 @9 c0 p
print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
- p3 ]0 S$ F. ?' a9 Y5 K
; q6 L8 b% K; R: D$ A5 `# d
self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
. \2 ]" ~& E& L* }2 k
' L* R; }$ V, V; }0 g( i; @( q
self.urlhandle = self.open( self.url )
1 S. Y, d! F |' ?" Q
- r7 C" A/ w( g: j7 B8 I# @
data = self.urlhandle.read( self.oneTimeSize )
! L; R- y! M$ l! k8 Z) Q) E
while data:
- C1 s7 t7 n& z/ Y4 T( g- r
filehandle = open( self.filename, 'ab+' )
7 z. Z+ T0 g; V
filehandle.write( data )
% {: [* `+ a. ~* }
filehandle.close()
# I# ~9 Z K; r$ d
, V ^7 S: _3 r# t3 k
self.downloaded += len( data )
) }+ I [5 T7 A) E* U! W
#print "%s" % (self.name)
- q' N N9 g! a
#progress = u'\r...'
; E9 a( H! }3 w: b4 u" K: d
# B( {0 g2 |4 x6 Y) v
data = self.urlhandle.read( self.oneTimeSize )
& O& }7 t1 @+ _5 {% K6 l+ h' F! g
# S7 l$ @6 j* }0 v
def GetUrlFileSize(url, proxies={}):
5 l7 I8 z! V. }9 n: G
urlHandler = urllib.urlopen( url, proxies=proxies )
9 M* q0 _! S8 v( |: j! T7 j
headers = urlHandler.info().headers
9 |& _7 z. h6 Y! }: p) e$ ^8 N/ x& m
length = 0
: |4 l# i$ |: ~4 i% O4 e0 c7 v" X$ H
for header in headers:
7 r- e: c* q; c( A1 `# o; z8 t
if header.find('Length') != -1:
0 I/ V ?+ e5 k2 j l; M# H$ o
length = header.split(':')[-1].strip()
1 ~( p: o/ \4 H5 R, D
length = int(length)
6 f3 A/ d' J( [% E4 a. q n% D
return length
) X6 f9 z7 ?' w6 b0 i2 q
: X: e+ l- x/ t
def SpliteBlocks(totalsize, blocknumber):
* C( q9 F6 s) |0 Z# B
blocksize = totalsize/blocknumber
& o6 {5 [$ ~3 _. {% i3 \/ k( ?
ranges = []
( K. H1 B8 l1 t6 J! Z
for i in range(0, blocknumber-1):
2 t- y; y+ |! {! E" b7 ~2 G) K# H' l" r
ranges.append((i*blocksize, i*blocksize +blocksize - 1))
. N; P, C3 r. C2 s
ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
& }$ H/ @' d; k; o! W6 i/ O: e- G8 U1 ?
, H: e+ u# B+ ^
return ranges
6 S9 K v4 ^, @
def islive(tasks):
' I6 J# s4 x, y
for task in tasks:
9 A5 k! r5 S- `
if task.isAlive():
( r* B W1 m' Q; ~
return True
/ F4 m: L3 v3 z `( Z5 `0 H( s
return False
& V! b9 O7 `$ y- V) m
D9 \6 h7 \9 M5 t) C ?& ]" v% z
def paxel(url, output, blocks=6, proxies=local_proxies):
, |* u; P l% l: T! d: ?) c
''' paxel
( p( p8 ~6 E- ~& F8 }1 e! {0 D, l
'''
! Q+ r* [- }- Z5 V
size = GetUrlFileSize( url, proxies )
# J5 q% [( j# W1 `4 k
ranges = SpliteBlocks( size, blocks )
, q) t( A. p- q t' @
9 t- p* \- h! e6 l6 w
threadname = [ "thread_%d" % i for i in range(0, blocks) ]
2 h1 z. G$ r1 c( h; T' }" m; M
filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
5 T+ |5 j6 ^7 d' F6 b
8 o+ i. }% S% k; k0 O7 ^( H
tasks = []
/ X! b) F& R Y$ v
for i in range(0,blocks):
* }1 [ n8 K% i- U
task = AxelPython( threadname[i], url, filename[i], ranges[i] )
! X* O4 y8 j, V
task.setDaemon( True )
, D1 P! V8 _, e* V9 C3 A6 L3 z
task.start()
^7 i) c2 J. d1 I. a' {" U- I2 a6 c
tasks.append( task )
- ~# T4 j. R r% u* B
( t" ?0 g: i' \
time.sleep( 2 )
& Y6 N* |# q& _# C
while islive(tasks):
: P5 F2 E+ e# P
downloaded = sum( [task.downloaded for task in tasks] )
! j1 Z' _. K9 y# m, {9 m
process = downloaded/float(size)*100
( Y# K) r& V) t2 O' L
show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
0 s, N: }$ F4 T Y5 Y
sys.stdout.write(show)
6 |2 w L4 B) J. }% h. x( a4 F O
sys.stdout.flush()
4 b2 @% F: l# a
time.sleep( 0.5 )
. L! O; Q3 G- i( o& E0 D ]6 X. N% T6 b
' }! G3 E. X: A, C8 J4 }5 F
filehandle = open( output, 'wb+' )
; Y7 F8 l. |8 T' x0 Q
for i in filename:
3 W9 @- G: ^$ @4 x
f = open( i, 'rb' )
$ T+ | u4 s* B% s3 r4 y! E! \
filehandle.write( f.read() )
' Y9 i7 |9 O7 E8 ]4 s4 ~1 d
f.close()
' v" ` @4 X' o$ H+ a
try:
+ U$ j. ~: U) ]9 a6 l5 T
os.remove(i)
! A) a, \# J7 p+ H' g! f, ^
pass
, g. U( F: g6 ]4 K1 l
except:
7 F0 K- u$ e/ Q! \" _7 p
pass
+ U1 K) \ m9 i- i
& n" t- z- f0 Z* m9 H
filehandle.close()
- W2 P, Y6 f- j; b1 t9 L
/ X7 i c7 b7 @$ d E
if __name__ == '__main__':
$ o6 n2 S, _: h* {
url = "http://xz1.mm667.com/xz84/images/001.jpg"
- a) K; F9 M \
output = '001.jpg'
; ?5 v7 e$ b+ J. Z& o
paxel( url, output, blocks=4, proxies={} )
复制代码
$ Q" T1 z0 f) f# B0 t0 C
欢迎光临 数学建模社区-数学中国 (http://www.madio.net/)
Powered by Discuz! X2.5