数学建模社区-数学中国

标题: 多线程下载图集 [打印本页]

作者: 2744557306    时间: 2024-3-31 17:18
标题: 多线程下载图集
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
  1. #!/usr/bin/python' `9 r. ^/ s& e4 V* M! R
  2. # -*- coding: utf-8 -*-
    . G0 F; U/ Z0 h7 c
  3. # filename: paxel.py7 `. L" z; T' E/ F) y. }
  4. # o# r. u5 A! D6 |$ n* Q0 E9 e
  5. '''It is a multi-thread downloading tool
    + H% f2 I4 R/ d! J1 s* L
  6. 6 V  t# U2 G, Z+ O5 M4 J
  7.     It was developed follow axel.  I: r' @7 h1 z3 x
  8.         Author: volans+ W8 b! S, z# f2 T+ ~, _' ?
  9.         E-mail: volansw [at] gmail.com
    ; {. w, B' j4 V* c8 t) g1 J: `
  10. '''$ T" h: q# e2 }+ A6 h7 |5 ]0 h9 \
  11. & G4 {. c9 a" k- H8 [
  12. import sys$ Z5 n* g" u8 i" H: O8 O
  13. import os
    1 Q9 d5 y# [. L# ^+ O# ?8 x5 ?2 R
  14. import time
    . ]  d8 g3 R4 F* h
  15. import urllib* }6 l$ T! ]' z
  16. from threading import Thread  x) z& e; g% R1 n4 \+ `

  17. : ], G$ V+ d0 [* a; P
  18. local_proxies = {'http': 'http://131.139.58.200:8080'}
    ) O. s& \, Z" |3 t
  19. ( Q* h. v5 f' r# D6 }6 ~' b4 _
  20. class AxelPython(Thread, urllib.FancyURLopener):
    ' q2 {) h* ]& {: c0 a' F
  21.     '''Multi-thread downloading class.+ Z1 G; {8 N) j
  22. 9 g/ q* o/ ?4 D5 f& y' o7 f
  23.         run() is a vitural method of Thread., Z2 m1 i6 S2 T6 P- S
  24.     '''
    * {- @! i# Z* l
  25.     def __init__(self, threadname, url, filename, ranges=0, proxies={}):, c9 K) l+ _: B1 |2 J: H
  26.         Thread.__init__(self, name=threadname)5 A8 b7 E+ c) g3 ]# ~
  27.         urllib.FancyURLopener.__init__(self, proxies)& R) S* C4 ]/ Z! d+ v
  28.         self.name = threadname
    " I2 a( K' o" ~2 @. v
  29.         self.url = url
    ) D) E, n6 x: Q- w3 r% k
  30.         self.filename = filename
    ' `" u2 a# g; r/ v9 G
  31.         self.ranges = ranges% O4 y' ^6 r  D  g
  32.         self.downloaded = 0
      O: K0 C( z# C! g8 x9 a. f1 q% }6 D
  33. 2 G3 U! n" z7 ?7 h
  34.     def run(self):/ Z" U5 G+ e( ^) v! t/ a$ L
  35.         '''vertual function in Thread'''
    / C) D0 L  ~2 z3 a9 s! Y" L% _
  36.         try:
    * a4 P  z8 X  N
  37.             self.downloaded = os.path.getsize( self.filename )5 k! [, k. M- L: D$ Q5 B
  38.         except OSError:/ d, \+ `* o  P
  39.             #print 'never downloaded'
    9 C9 ]6 Y0 v. Z
  40.             self.downloaded = 01 e3 f# i- i" c6 ?1 d
  41. 9 [/ q# P% Q0 T& I, f' H
  42.         # rebuild start poind
    2 W. `/ ^4 Z4 l9 @
  43.         self.startpoint = self.ranges[0] + self.downloaded
    % |- U9 j/ U+ u- i% K. l
  44.          & f4 @3 h( q! Q5 L
  45.         # This part is completed
    ' Y+ `1 Z7 v& b, w' y; f! k: {& n
  46.         if self.startpoint >= self.ranges[1]:
    4 _: m* p) J: P4 n7 C6 \; F
  47.             print 'Part %s has been downloaded over.' % self.filename( Z2 Q$ U& \. Z$ S
  48.             return! R! e- j! C$ e7 X4 A, p+ S3 B' G
  49.          
    9 S1 s' j, u1 [! ~5 K# }3 W. O
  50.         self.oneTimeSize = 16384 #16kByte/time0 Q* ?+ j) D% I! ?7 d+ e# j
  51.         print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])$ S: A% r4 Q6 g& ]# h7 d

  52. # c3 d; l1 y( C, ~5 q: M
  53.         self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
    6 b$ [1 u( L/ ]9 O4 ]: s
  54.             
    ; o6 a4 R( e6 P: G6 l. _8 F
  55.         self.urlhandle = self.open( self.url )
    ( s1 }" y& R- N6 M$ J' t

  56. 8 B& ^; S5 ?5 h( x/ F% _' t
  57.         data = self.urlhandle.read( self.oneTimeSize ). T# q! m4 S4 E7 k0 b
  58.         while data:1 @- h  E5 ^7 |; g5 a! N2 W
  59.             filehandle = open( self.filename, 'ab+' )
    6 N! U# w! Q$ c1 A, _2 h' I: r. @
  60.             filehandle.write( data ), n) v6 d2 m5 t9 ?# i* g5 h
  61.             filehandle.close()
    & d8 G6 {1 _  C" @, g  n  u

  62. 5 Q- u9 L  R) C3 U( `
  63.             self.downloaded += len( data )
    - D$ X! m- a7 i* H3 d
  64.             #print "%s" % (self.name)
    8 W) [& @/ a. W# e: t/ N) K+ m
  65.             #progress = u'\r...'
    & Y% L% j8 y# G' _
  66. 5 p2 ~7 M' \/ L. {* w
  67.             data = self.urlhandle.read( self.oneTimeSize )1 `3 k% N9 \! j9 Z0 O
  68.          0 a/ p& A# G+ Q4 U2 ]
  69. def GetUrlFileSize(url, proxies={}):
    ' i! c' |% e; a1 w; O; ]3 I$ ]
  70.     urlHandler = urllib.urlopen( url, proxies=proxies )
    $ }; Q+ S0 q- z8 C  I0 w6 y
  71.     headers = urlHandler.info().headers
    , t) m. `& r# k* U3 e+ N
  72.     length = 0
    ( i) {) r  R- W! ^4 g, M6 k& j- E* x& B
  73.     for header in headers:: e. h9 ~% U* m8 j8 f
  74.         if header.find('Length') != -1:6 S  j/ j3 h1 ^5 h$ I( e+ g0 Z  t
  75.             length = header.split(':')[-1].strip()
    + l" `* H9 s. i0 u% r
  76.             length = int(length)
    ; i/ }" Y% Y) L0 S, X* D
  77.     return length
    1 C2 j# x" M: N; g

  78. 3 U2 F% i8 d+ l2 T$ S6 _
  79. def SpliteBlocks(totalsize, blocknumber):3 R) I. y& j3 t% o. v. @
  80.     blocksize = totalsize/blocknumber
    # c' j) ]( H1 g6 Y  m& E. T0 s
  81.     ranges = []; ]0 y- q, j8 a0 V9 _; D
  82.     for i in range(0, blocknumber-1):- r  q+ n$ T# L2 l" Q5 U
  83.         ranges.append((i*blocksize, i*blocksize +blocksize - 1))
    : c% X, i/ T6 n4 _5 c
  84.     ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
    3 f3 ?1 i* ~7 e+ F7 w

  85. ! W. O) A) ?" W! r$ D
  86.     return ranges
    4 ~% T* c- ]' a/ n: }; z
  87. def islive(tasks):
    ' L4 y$ R/ T  z7 k( T( T
  88.     for task in tasks:
    6 e9 x- X0 y" Z; G; ~/ q
  89.         if task.isAlive():
    0 V  U6 v/ Y) Y6 @% ~% i1 B% _
  90.             return True
    # l: N- D1 D; E. v9 p# H- H
  91.     return False
    7 c- {& ~, B+ {
  92. . s% c0 L$ N( M9 L8 r) _: v0 ^+ R
  93. def paxel(url, output, blocks=6, proxies=local_proxies):4 N) q/ v* \1 D( B& M) V2 k
  94.     ''' paxel
    5 V% v" K) B: v7 [# ]" l( a7 d
  95.     '''& U" v! T6 q  ^; {
  96.     size = GetUrlFileSize( url, proxies )
    ( B2 T- K( x7 D( N' S1 N( J
  97.     ranges = SpliteBlocks( size, blocks )
    - W! ?4 n3 w! \- }: Q, H6 o

  98. & e2 J( k& c6 d; g5 \9 T* A
  99.     threadname = [ "thread_%d" % i for i in range(0, blocks) ]/ v  ]0 I2 \( ?( Z, z
  100.     filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
    9 C+ Y# q% l7 f. \
  101.    
    * b2 Q5 i( V" l! y
  102.     tasks = []
    % S3 E" m8 ]% B4 }5 S7 Y) z2 |
  103.     for i in range(0,blocks):
    0 I) G  S. F7 k: v4 U! w: B
  104.         task = AxelPython( threadname[i], url, filename[i], ranges[i] )
    ( T( n8 s" I& d2 n/ f5 s. K- T& \5 E
  105.         task.setDaemon( True )
    3 F% S+ i, _! N" |. s5 B
  106.         task.start()
    6 L% i! [8 i- d
  107.         tasks.append( task )1 B5 J0 j& c# d8 n' ^; n
  108.          # l5 \& ^$ t# i% i: L% u
  109.     time.sleep( 2 )3 k* E% R$ u2 o
  110.     while islive(tasks):
    * ~$ _) ~) q. J) A
  111.         downloaded = sum( [task.downloaded for task in tasks] )% U: I( q1 k& M! j  f
  112.         process = downloaded/float(size)*100
    8 d4 c$ w6 c8 T% {- w3 m
  113.         show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)4 P2 x/ K1 Z$ _
  114.         sys.stdout.write(show)3 S; c9 A. U  f2 Y; V* |9 ~, V
  115.         sys.stdout.flush()
    4 S) j$ _0 K) [( e. t6 y
  116.         time.sleep( 0.5 )  y: x/ M4 ~' a4 l; x4 i5 r
  117.                Y  k! F4 x: d
  118.     filehandle = open( output, 'wb+' )
    & b6 p. ^: j" N; W2 z# x* w
  119.     for i in filename:9 z1 Y9 s6 c) |9 `
  120.         f = open( i, 'rb' )9 P" E+ s4 o7 j. I) s
  121.         filehandle.write( f.read() )
    , o5 m4 Y: \  S* M% `. ~
  122.         f.close()
    + K+ |" y3 O2 _3 @/ x
  123.         try:
    3 D. \( B! Z4 @' H9 @) F3 n
  124.             os.remove(i)
    9 b8 X$ n' P- X* N
  125.             pass& |9 Q. ?5 ~$ s) y9 ]0 {# s$ \$ K* n) O
  126.         except:; e2 L9 F# u  `; ?
  127.             pass* c. o* D/ ^' J
  128. $ O2 i# w: S3 z; J, W9 C# K
  129.     filehandle.close()
    5 a! f$ Q3 y+ ^, F, ?  H

  130. 7 d2 o0 \3 b  _( R5 {/ t
  131. if __name__ == '__main__':' X& W5 _( }3 p) l+ `) K
  132.     url = "http://xz1.mm667.com/xz84/images/001.jpg"6 {/ M  K8 A  [' v# d
  133.     output = '001.jpg'
    6 Y& [" _. k  Z- p7 W
  134.     paxel( url, output, blocks=4, proxies={} )
复制代码
/ E: A. I* h* E6 W4 X% m( r





欢迎光临 数学建模社区-数学中国 (http://www.madio.net/) Powered by Discuz! X2.5