数学建模社区-数学中国

标题: 多线程下载图集 [打印本页]

作者: 2744557306    时间: 2024-3-31 17:18
标题: 多线程下载图集
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
  1. #!/usr/bin/python7 u: o. V# ?4 x3 c
  2. # -*- coding: utf-8 -*-9 t% q; r$ @5 Q7 h! o' j
  3. # filename: paxel.py
    : y0 ^  Q) T% \
  4. & q1 c6 B! R( D6 j$ k. S  P
  5. '''It is a multi-thread downloading tool$ w% n5 X& d* @& D. |  s3 R

  6. 5 |# L& N5 |3 B4 v( f- R7 t
  7.     It was developed follow axel./ J5 \7 V: Y/ G
  8.         Author: volans5 _0 u! N. [/ X* c
  9.         E-mail: volansw [at] gmail.com/ l2 w, D  J4 O, C7 s6 p8 [
  10. '''
    3 }' O( k6 v* Z. _/ W* Q8 Q
  11. - ?$ S' v. }$ M1 M" N
  12. import sys
    4 t6 }; n& q& `2 m1 B7 D
  13. import os0 g1 ~7 J1 a6 E4 X. l/ P
  14. import time2 q: q& h. K# N  Q. S
  15. import urllib
    1 ^1 P, `) N0 @6 z5 b, {
  16. from threading import Thread: d$ ~! }! B# d9 V

  17. * f9 `/ y( P; J; g7 S7 V) [
  18. local_proxies = {'http': 'http://131.139.58.200:8080'}
    9 k# M: o# c5 h8 E. t$ ^: `; _

  19. 0 a! g3 d1 c) D1 z4 c* }
  20. class AxelPython(Thread, urllib.FancyURLopener):
    : z! w# y+ u0 h9 Q: R
  21.     '''Multi-thread downloading class.
    ; T& }2 F$ r% B5 ?5 R2 k

  22. ( f# r4 C9 R$ \" m! H. W/ W9 }7 @  d: j
  23.         run() is a vitural method of Thread.% m1 L2 P5 W) V1 t- t' e0 R
  24.     '''% G' G) d% k; c* a/ c0 f
  25.     def __init__(self, threadname, url, filename, ranges=0, proxies={}):, Y" s: c' m4 g( Q9 _8 ^
  26.         Thread.__init__(self, name=threadname): H1 o9 Z. p+ O' B( \
  27.         urllib.FancyURLopener.__init__(self, proxies)
    9 g# t/ ^/ E3 T, z- r# L' _0 O
  28.         self.name = threadname
    / B& l% U8 ?. i: T5 l5 o8 J
  29.         self.url = url
    % w/ q- M, B% p5 y( B3 R" I5 ?
  30.         self.filename = filename. ~' t4 e. ^: }4 `- L! N+ \
  31.         self.ranges = ranges
    : Y2 `' `3 u/ J. A$ L; t3 x! \
  32.         self.downloaded = 05 U+ A9 y! E0 n# l
  33. / v$ p" x) H6 O  }( {2 b
  34.     def run(self):
    2 s& Y8 R( `4 s% r8 S) @% b' d
  35.         '''vertual function in Thread'''
    6 ]  D  t& J0 c+ [2 U/ a
  36.         try:, @6 d3 p' S2 r$ Z! u
  37.             self.downloaded = os.path.getsize( self.filename )
    # P# C+ g/ |, M5 a
  38.         except OSError:
    + g7 {3 w! ~, q) W8 u
  39.             #print 'never downloaded'9 o( @) j/ S( a( `8 q# Q* n
  40.             self.downloaded = 0
    9 X( X; ]2 O4 D

  41. - j3 q* R! G9 f; g6 o
  42.         # rebuild start poind
    9 R2 j6 g) W/ S* I+ N3 X
  43.         self.startpoint = self.ranges[0] + self.downloaded
    6 d  ?6 @# r/ h1 k
  44.          
    3 H+ C/ D" @- Q: M
  45.         # This part is completed
    " }( a8 V5 i0 f4 Q  O
  46.         if self.startpoint >= self.ranges[1]:
    7 [$ I* ]; y) Q. `+ r: O) Z* D. }0 A
  47.             print 'Part %s has been downloaded over.' % self.filename+ C1 @# j# Y% H) a% A/ A
  48.             return$ l" S) w) J" {
  49.          0 ?* I# n: V9 n/ B  B- K0 x; a
  50.         self.oneTimeSize = 16384 #16kByte/time
    9 i: k; ~% s% i" k' u9 @9 c0 p
  51.         print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])- p3 ]0 S$ F. ?' a9 Y5 K
  52. ; q6 L8 b% K; R: D$ A5 `# d
  53.         self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
    . \2 ]" ~& E& L* }2 k
  54.              ' L* R; }$ V, V; }0 g( i; @( q
  55.         self.urlhandle = self.open( self.url )
    1 S. Y, d! F  |' ?" Q

  56. - r7 C" A/ w( g: j7 B8 I# @
  57.         data = self.urlhandle.read( self.oneTimeSize )! L; R- y! M$ l! k8 Z) Q) E
  58.         while data:
    - C1 s7 t7 n& z/ Y4 T( g- r
  59.             filehandle = open( self.filename, 'ab+' )7 z. Z+ T0 g; V
  60.             filehandle.write( data )
    % {: [* `+ a. ~* }
  61.             filehandle.close()# I# ~9 Z  K; r$ d

  62. , V  ^7 S: _3 r# t3 k
  63.             self.downloaded += len( data )) }+ I  [5 T7 A) E* U! W
  64.             #print "%s" % (self.name)
    - q' N  N9 g! a
  65.             #progress = u'\r...'
    ; E9 a( H! }3 w: b4 u" K: d
  66. # B( {0 g2 |4 x6 Y) v
  67.             data = self.urlhandle.read( self.oneTimeSize )& O& }7 t1 @+ _5 {% K6 l+ h' F! g
  68.          # S7 l$ @6 j* }0 v
  69. def GetUrlFileSize(url, proxies={}):
    5 l7 I8 z! V. }9 n: G
  70.     urlHandler = urllib.urlopen( url, proxies=proxies )
    9 M* q0 _! S8 v( |: j! T7 j
  71.     headers = urlHandler.info().headers9 |& _7 z. h6 Y! }: p) e$ ^8 N/ x& m
  72.     length = 0
    : |4 l# i$ |: ~4 i% O4 e0 c7 v" X$ H
  73.     for header in headers:7 r- e: c* q; c( A1 `# o; z8 t
  74.         if header.find('Length') != -1:
    0 I/ V  ?+ e5 k2 j  l; M# H$ o
  75.             length = header.split(':')[-1].strip()
    1 ~( p: o/ \4 H5 R, D
  76.             length = int(length)6 f3 A/ d' J( [% E4 a. q  n% D
  77.     return length
    ) X6 f9 z7 ?' w6 b0 i2 q
  78. : X: e+ l- x/ t
  79. def SpliteBlocks(totalsize, blocknumber):
    * C( q9 F6 s) |0 Z# B
  80.     blocksize = totalsize/blocknumber& o6 {5 [$ ~3 _. {% i3 \/ k( ?
  81.     ranges = []
    ( K. H1 B8 l1 t6 J! Z
  82.     for i in range(0, blocknumber-1):2 t- y; y+ |! {! E" b7 ~2 G) K# H' l" r
  83.         ranges.append((i*blocksize, i*blocksize +blocksize - 1))
    . N; P, C3 r. C2 s
  84.     ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))& }$ H/ @' d; k; o! W6 i/ O: e- G8 U1 ?
  85. , H: e+ u# B+ ^
  86.     return ranges6 S9 K  v4 ^, @
  87. def islive(tasks):
    ' I6 J# s4 x, y
  88.     for task in tasks:
    9 A5 k! r5 S- `
  89.         if task.isAlive():( r* B  W1 m' Q; ~
  90.             return True
    / F4 m: L3 v3 z  `( Z5 `0 H( s
  91.     return False
    & V! b9 O7 `$ y- V) m

  92.   D9 \6 h7 \9 M5 t) C  ?& ]" v% z
  93. def paxel(url, output, blocks=6, proxies=local_proxies):, |* u; P  l% l: T! d: ?) c
  94.     ''' paxel( p( p8 ~6 E- ~& F8 }1 e! {0 D, l
  95.     '''! Q+ r* [- }- Z5 V
  96.     size = GetUrlFileSize( url, proxies )# J5 q% [( j# W1 `4 k
  97.     ranges = SpliteBlocks( size, blocks )
    , q) t( A. p- q  t' @

  98. 9 t- p* \- h! e6 l6 w
  99.     threadname = [ "thread_%d" % i for i in range(0, blocks) ]2 h1 z. G$ r1 c( h; T' }" m; M
  100.     filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]5 T+ |5 j6 ^7 d' F6 b
  101.    
    8 o+ i. }% S% k; k0 O7 ^( H
  102.     tasks = []/ X! b) F& R  Y$ v
  103.     for i in range(0,blocks):* }1 [  n8 K% i- U
  104.         task = AxelPython( threadname[i], url, filename[i], ranges[i] )! X* O4 y8 j, V
  105.         task.setDaemon( True ), D1 P! V8 _, e* V9 C3 A6 L3 z
  106.         task.start()  ^7 i) c2 J. d1 I. a' {" U- I2 a6 c
  107.         tasks.append( task )- ~# T4 j. R  r% u* B
  108.          
    ( t" ?0 g: i' \
  109.     time.sleep( 2 )& Y6 N* |# q& _# C
  110.     while islive(tasks):: P5 F2 E+ e# P
  111.         downloaded = sum( [task.downloaded for task in tasks] )
    ! j1 Z' _. K9 y# m, {9 m
  112.         process = downloaded/float(size)*100
    ( Y# K) r& V) t2 O' L
  113.         show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)0 s, N: }$ F4 T  Y5 Y
  114.         sys.stdout.write(show)6 |2 w  L4 B) J. }% h. x( a4 F  O
  115.         sys.stdout.flush()4 b2 @% F: l# a
  116.         time.sleep( 0.5 ). L! O; Q3 G- i( o& E0 D  ]6 X. N% T6 b
  117.             
    ' }! G3 E. X: A, C8 J4 }5 F
  118.     filehandle = open( output, 'wb+' )
    ; Y7 F8 l. |8 T' x0 Q
  119.     for i in filename:3 W9 @- G: ^$ @4 x
  120.         f = open( i, 'rb' )
    $ T+ |  u4 s* B% s3 r4 y! E! \
  121.         filehandle.write( f.read() )
    ' Y9 i7 |9 O7 E8 ]4 s4 ~1 d
  122.         f.close()
    ' v" `  @4 X' o$ H+ a
  123.         try:+ U$ j. ~: U) ]9 a6 l5 T
  124.             os.remove(i)
    ! A) a, \# J7 p+ H' g! f, ^
  125.             pass, g. U( F: g6 ]4 K1 l
  126.         except:
    7 F0 K- u$ e/ Q! \" _7 p
  127.             pass+ U1 K) \  m9 i- i

  128. & n" t- z- f0 Z* m9 H
  129.     filehandle.close()- W2 P, Y6 f- j; b1 t9 L
  130. / X7 i  c7 b7 @$ d  E
  131. if __name__ == '__main__':
    $ o6 n2 S, _: h* {
  132.     url = "http://xz1.mm667.com/xz84/images/001.jpg"- a) K; F9 M  \
  133.     output = '001.jpg'
    ; ?5 v7 e$ b+ J. Z& o
  134.     paxel( url, output, blocks=4, proxies={} )
复制代码
$ Q" T1 z0 f) f# B0 t0 C





欢迎光临 数学建模社区-数学中国 (http://www.madio.net/) Powered by Discuz! X2.5