QQ登录

只需要一步,快速开始

 注册地址  找回密码
查看: 2233|回复: 0
打印 上一主题 下一主题

多线程下载图集

[复制链接]
字体大小: 正常 放大

1186

主题

4

听众

2923

积分

该用户从未签到

跳转到指定楼层
1#
发表于 2024-3-31 17:18 |只看该作者 |倒序浏览
|招呼Ta 关注Ta
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
  1. #!/usr/bin/python' d9 E0 {5 _- Z# n
  2. # -*- coding: utf-8 -*-
    4 Q# M, z7 ^* B( z\" `
  3. # filename: paxel.py  e7 `/ G/ G8 B7 i$ `1 j6 L

  4. * x/ {% `. w( y6 _* b9 x
  5. '''It is a multi-thread downloading tool* {2 g( u\" S; r! w( F

  6. 9 H1 g  H+ j: m* z9 R
  7.     It was developed follow axel./ L+ e; ?$ J6 U- t1 {9 ]9 b1 `4 T; ]
  8.         Author: volans4 B- ]& d/ D4 k
  9.         E-mail: volansw [at] gmail.com, ?/ j8 X2 V* ?( E/ d
  10. '''  W& A) B1 `3 n

  11. . `# F9 G. a! d
  12. import sys/ V# _3 o' c' t1 v; |; s
  13. import os
    5 C0 `- O! N1 a$ X  B7 H
  14. import time. H( i5 |+ j( ^\" K+ V9 t8 O
  15. import urllib
    + S; Y- m  U6 r; s) M& {4 r! @
  16. from threading import Thread0 t  R: ]  i( L

  17. 4 q) V% B1 Y7 Q' z3 r
  18. local_proxies = {'http': 'http://131.139.58.200:8080'}
    - ]0 Y+ R+ d+ t

  19. \" d. k' U3 H( d' Z
  20. class AxelPython(Thread, urllib.FancyURLopener):
    % Z% W% ]# ~, ]4 b
  21.     '''Multi-thread downloading class.
    6 m3 z2 B$ l% h\" N
  22. ) a( w+ I3 L1 j7 \3 L* [
  23.         run() is a vitural method of Thread.) g( M* r/ \% B. p# k\" C% l
  24.     '''! C0 v% ~# [* Z3 ~% S7 u
  25.     def __init__(self, threadname, url, filename, ranges=0, proxies={}):  S- M\" i& I8 a6 A& v
  26.         Thread.__init__(self, name=threadname)0 J5 T9 Q! F5 z' C
  27.         urllib.FancyURLopener.__init__(self, proxies)8 o! Q# C; [: u& [. _( V; {
  28.         self.name = threadname
    # j( [/ O, {\" H! N' ?% d7 Y9 B8 c
  29.         self.url = url1 u, K. n7 P* t1 x2 w
  30.         self.filename = filename2 x) Y\" c  `- E
  31.         self.ranges = ranges  B1 E4 f0 [3 ]* }9 k
  32.         self.downloaded = 05 a2 B! m3 ~) E$ B6 o\" \\" h1 n
  33. ( R- m( j' Y1 \6 l
  34.     def run(self):* f+ G, A; J3 N& @. ^1 f- j
  35.         '''vertual function in Thread''': W1 g! y; c, h( Y) Q' H6 |$ [- {! W
  36.         try:
    7 n  N( |\" d/ {( o$ P' \
  37.             self.downloaded = os.path.getsize( self.filename )) C! Y' G! c0 v
  38.         except OSError:
    5 |+ E% x0 e& r' V2 u# }. u
  39.             #print 'never downloaded'
    % |( b. l+ k! n
  40.             self.downloaded = 0
    2 x5 j) u; w% O2 q( m. l

  41. & b: a  Q9 e% `3 U& P2 U
  42.         # rebuild start poind
    \" O0 Q: A( h: D5 i# \$ T
  43.         self.startpoint = self.ranges[0] + self.downloaded' B' E' Y, N% k, }
  44.          
    1 s4 |( Y* x  X\" b( ?4 p
  45.         # This part is completed- X9 c3 Y( [% G% l  z
  46.         if self.startpoint >= self.ranges[1]:! \& W; G- O- m7 j% s
  47.             print 'Part %s has been downloaded over.' % self.filename$ i0 L9 }; z0 _\" q3 c
  48.             return
      p3 r1 ]+ A; M5 S
  49.          
    * Q5 ^* w) S# ^
  50.         self.oneTimeSize = 16384 #16kByte/time
    - j) a: o  W& P! E\" Y
  51.         print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])% a# G5 Y7 x* p; O! A

  52. 3 t0 u- f5 N, s
  53.         self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))\" C. d& J6 P- J9 ^\" C5 T
  54.             
    9 W  ]\" Y9 B\" m, Z
  55.         self.urlhandle = self.open( self.url )
    ' U! [0 b$ F) S: {1 i3 p# _4 z  v) ~
  56. 3 ^# f9 i* S, p9 W, U2 |( Z8 u  _3 i\" D2 m
  57.         data = self.urlhandle.read( self.oneTimeSize )
    / [: i; J\" S- S
  58.         while data:& ?( e9 k/ J+ j3 P- c
  59.             filehandle = open( self.filename, 'ab+' )+ @7 H& e- d( Z, Z/ ?) P9 s
  60.             filehandle.write( data )
    5 B5 }* H. d, U3 B
  61.             filehandle.close()) H: ?\" w2 G8 ^
  62. + ~0 @& d2 O' k& k# `
  63.             self.downloaded += len( data )4 \& q# f' ?/ [4 J
  64.             #print "%s" % (self.name)
    2 Y& X0 c9 o8 g9 o3 u
  65.             #progress = u'\r...'4 o* l3 F/ U) i5 s7 d* D0 }6 I
  66. / k, G' E1 @2 y# ~' v
  67.             data = self.urlhandle.read( self.oneTimeSize )* {9 j* P+ Y\" r7 ]
  68.          ! z- m: @( O! s# A# M+ n8 w
  69. def GetUrlFileSize(url, proxies={}):1 d% }& K. z2 ~- G
  70.     urlHandler = urllib.urlopen( url, proxies=proxies )
    * x\" m: A7 {* H' I
  71.     headers = urlHandler.info().headers! v1 x3 _' @4 a8 ?# N% u5 ~# }
  72.     length = 0/ n6 [+ \# Q- D: r
  73.     for header in headers:
    5 _' t) D6 o8 E! c7 \. z
  74.         if header.find('Length') != -1:9 n; |5 i  H8 {) g
  75.             length = header.split(':')[-1].strip()
    . R$ T- w; L4 O: B
  76.             length = int(length)
    7 g+ L+ V& X: s/ G5 a/ q
  77.     return length
    ' t* O: s3 B$ k. j7 h. O

  78. - ~, ?/ _; H: _& e
  79. def SpliteBlocks(totalsize, blocknumber):! F1 k6 c) P2 L& R. L, Z
  80.     blocksize = totalsize/blocknumber
    2 U/ G- z- M9 ^* g! ^\" o) m9 D
  81.     ranges = []
    ) V# L* U3 ~: ^5 S  t
  82.     for i in range(0, blocknumber-1):
    2 Q. ^) v1 }* ]& t
  83.         ranges.append((i*blocksize, i*blocksize +blocksize - 1))/ \4 w9 r, m- i6 S9 k+ T
  84.     ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))$ b; ?9 I9 R1 k\" h& C

  85. - g! X2 i$ V6 l$ O& i
  86.     return ranges8 h% S, B+ u& I% w5 P% ?/ b
  87. def islive(tasks):; G5 N( w2 u3 {6 a  O4 c, p; ^, a
  88.     for task in tasks:
    9 J# \1 \  `$ h# e* x( U( o! Z
  89.         if task.isAlive():
    5 w$ M1 T4 V0 u) |, d
  90.             return True
    + v* w\" D) W+ @) k: o  q! }7 {
  91.     return False
    3 _( c, o, n. X1 E

  92. 4 z- B0 z8 ~: V1 ?2 D
  93. def paxel(url, output, blocks=6, proxies=local_proxies):# |6 p! ]$ y+ R( V7 {+ \+ D
  94.     ''' paxel
    + ?3 C; M0 t' g& U
  95.     ''': e' Y; o, A/ W( p
  96.     size = GetUrlFileSize( url, proxies )$ p: D3 P& I7 Q/ X
  97.     ranges = SpliteBlocks( size, blocks )8 {- U! R$ c, y! c

  98. ' ~8 E/ {3 E  n
  99.     threadname = [ "thread_%d" % i for i in range(0, blocks) ]% {9 }0 W. a5 K& |/ w. S4 g
  100.     filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
    * W( e+ ?* |  A' R\" }
  101.    
    8 Q\" n8 N; d' S1 t+ ?& P
  102.     tasks = []& r& e$ d% d9 E& {2 t! _
  103.     for i in range(0,blocks):
    ! D' ?# d8 V\" j) A- _! M
  104.         task = AxelPython( threadname[i], url, filename[i], ranges[i] )) l0 q7 @. q9 W1 [5 _! q
  105.         task.setDaemon( True )
    0 G- z1 }2 x' J8 S- p5 \+ q; C! q
  106.         task.start()4 G+ A( i7 d6 D  `0 O
  107.         tasks.append( task )
    ( t1 Q\" _9 Z2 \% i5 v9 W* f+ v% ~
  108.          ; f2 `/ B7 J* W( `6 m
  109.     time.sleep( 2 )' v\" I9 |7 b+ ~
  110.     while islive(tasks):
    5 t9 t5 ?+ ~4 D) d4 P. H4 O
  111.         downloaded = sum( [task.downloaded for task in tasks] )
    # V) `; C: I: a2 R. u* y
  112.         process = downloaded/float(size)*1002 L8 y2 o$ f9 S) ^* X
  113.         show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)/ D5 d5 m  _7 R, Q0 ^! m$ U
  114.         sys.stdout.write(show)9 f# A: a\" k% T5 D0 g1 g# Z- e
  115.         sys.stdout.flush()% k/ n0 B6 P5 r
  116.         time.sleep( 0.5 )
      ^\" N# @# K, e: O  c5 [4 o- A6 S
  117.             
    2 T, J9 n) t! K/ Z& C8 `
  118.     filehandle = open( output, 'wb+' )
    3 A+ A8 \/ X, R. `/ i+ B  j
  119.     for i in filename:3 G\" G, p# f; R* r
  120.         f = open( i, 'rb' )$ L- \6 M2 b7 X, _
  121.         filehandle.write( f.read() )! f$ A) d, r5 N4 ?3 k' n
  122.         f.close()
    3 a8 S- T. t1 X0 R) |
  123.         try:
    ; ?  S9 b% }% N+ E. M! t
  124.             os.remove(i)8 q, \7 {+ b\" i+ L
  125.             pass
    ; v0 k3 ~) r8 A4 i5 F* J
  126.         except:
    . J% m8 {0 O( U
  127.             pass
    2 w2 a' B$ ^& p\" y3 a
  128. \" t) M5 @\" o0 |2 N
  129.     filehandle.close()
    / z8 i: ~\" ?2 m5 t' h5 g. o% h

  130. 7 Z1 p* l: F: K1 V/ Z2 M
  131. if __name__ == '__main__':
    : ?2 w) F8 g, b. v\" v1 U
  132.     url = "http://xz1.mm667.com/xz84/images/001.jpg"
    6 H3 P& ?/ q2 g( L* C
  133.     output = '001.jpg'
    3 e1 V+ N( y1 {* U( Y# |/ Z
  134.     paxel( url, output, blocks=4, proxies={} )
复制代码
1 v; ^/ q2 y4 B
zan
转播转播0 分享淘帖0 分享分享0 收藏收藏0 支持支持0 反对反对0 微信微信
您需要登录后才可以回帖 登录 | 注册地址

qq
收缩
  • 电话咨询

  • 04714969085
fastpost

关于我们| 联系我们| 诚征英才| 对外合作| 产品服务| QQ

手机版|Archiver| |繁體中文 手机客户端  

蒙公网安备 15010502000194号

Powered by Discuz! X2.5   © 2001-2013 数学建模网-数学中国 ( 蒙ICP备14002410号-3 蒙BBS备-0002号 )     论坛法律顾问:王兆丰

GMT+8, 2026-4-20 10:55 , Processed in 0.426753 second(s), 51 queries .

回顶部