QQ登录

只需要一步,快速开始

 注册地址  找回密码
查看: 2247|回复: 0
打印 上一主题 下一主题

多线程下载图集

[复制链接]
字体大小: 正常 放大

1189

主题

4

听众

2934

积分

该用户从未签到

跳转到指定楼层
1#
发表于 2024-3-31 17:18 |只看该作者 |倒序浏览
|招呼Ta 关注Ta
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
  1. #!/usr/bin/python# |0 A) h4 I% B; q9 d: U8 g
  2. # -*- coding: utf-8 -*-  ?. z! ^6 w- M5 [+ M
  3. # filename: paxel.py$ `5 V* w$ p; O4 Y

  4. $ }& u5 q* A2 j9 y
  5. '''It is a multi-thread downloading tool* V, S* h& y\" n: ]- a+ h

  6. 6 k3 F& N0 l8 J' A
  7.     It was developed follow axel.8 W+ b8 Z7 g1 w/ \+ [2 h3 @% ?: p
  8.         Author: volans
    2 |( b/ v1 Y+ f. |( r
  9.         E-mail: volansw [at] gmail.com
    $ M7 w$ Y3 `# M  H0 ?, x, S1 l/ ]
  10. '''
    $ t- p' r\" E: R* N1 @( L

  11. ' I+ w& j  o6 o6 ?+ R
  12. import sys
    # K, B! @# X- K% S8 W% c
  13. import os. ~6 I# F: y- F8 y; ?: @7 w
  14. import time: t$ F: M2 d! p: v9 Q; ^- V
  15. import urllib9 g' }\" d; Q' p1 s\" n: B7 E
  16. from threading import Thread% @/ O; [) i% l2 l/ v3 \5 [! _

  17. + O) C( x9 C& Q2 X9 ~) n; k, b\" g
  18. local_proxies = {'http': 'http://131.139.58.200:8080'}: }/ T; t8 ?6 k( f; a9 X3 H+ ?/ m) {

  19. ; G4 e, |5 O/ E  g; A/ q
  20. class AxelPython(Thread, urllib.FancyURLopener):
    * ]: a# ^+ r. p
  21.     '''Multi-thread downloading class.5 q6 x, g- w/ G6 t1 B* E9 F

  22. * P/ `$ r! k1 @# a\" M! I
  23.         run() is a vitural method of Thread.
    ; K2 y9 Q3 I3 d) d/ a4 M3 m& j
  24.     '''
      l$ ~; G! N$ d
  25.     def __init__(self, threadname, url, filename, ranges=0, proxies={}):
    9 M& _$ c! W0 C) t! W0 h
  26.         Thread.__init__(self, name=threadname)* U9 H\" T7 j& J9 d1 C
  27.         urllib.FancyURLopener.__init__(self, proxies): e! t# ]* t0 a% M7 @7 V& U1 b
  28.         self.name = threadname( D# A) c/ L1 z0 o  u
  29.         self.url = url
    5 F+ A; k1 J+ `  @7 W
  30.         self.filename = filename- j' a; O. k* w# E# D) v
  31.         self.ranges = ranges
    % L4 P9 }5 K7 j$ Q7 E4 H+ L
  32.         self.downloaded = 0
    8 ^5 Q% z8 H! M: J6 d, P
  33. 9 P1 [9 ?) {, k6 S1 g
  34.     def run(self):3 m7 ^! F\" v0 h# V# m+ O
  35.         '''vertual function in Thread'''% ~$ y4 W0 A\" N8 j
  36.         try:3 y' `$ g# T5 J) W) w! E
  37.             self.downloaded = os.path.getsize( self.filename ); }$ ?% m+ k- @
  38.         except OSError:5 p* v3 j- v  u7 b9 T: Y8 x
  39.             #print 'never downloaded'7 f% |7 m  v9 \9 ]
  40.             self.downloaded = 0% Z( ]8 D. v0 ]' ]
  41. 5 V2 N8 S3 ?& J0 V\" _
  42.         # rebuild start poind
    3 c  B7 z* l2 I* h3 {+ U- u
  43.         self.startpoint = self.ranges[0] + self.downloaded\" h. J# T: q8 d
  44.          
    4 d1 m# [  z% m# }' a4 X
  45.         # This part is completed
    - Y, b2 r; Z2 t1 i
  46.         if self.startpoint >= self.ranges[1]:
    : S' f  }/ Y  x6 A$ B) d! K\" c9 Y3 H
  47.             print 'Part %s has been downloaded over.' % self.filename
    3 q! r' k: ^# G8 Q
  48.             return
    \" v2 F2 U) _1 q& K
  49.          
    5 ?5 t) ?: }1 n
  50.         self.oneTimeSize = 16384 #16kByte/time
    + C1 V. f1 ?6 i( L
  51.         print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
    / B6 }% b3 l/ s% u
  52. 9 [7 o. o- M4 d- o
  53.         self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
    ' I; v4 w2 |7 u+ }\" W
  54.             
    ' C3 C* Q' k! ~- i8 L
  55.         self.urlhandle = self.open( self.url )
    & r7 i/ ]* J; A5 s  z4 _
  56. ; f7 \, g3 p! L
  57.         data = self.urlhandle.read( self.oneTimeSize )
    ; b9 ]0 ~  v# a8 o+ z! H+ H9 n
  58.         while data:: l0 L9 N8 G5 t5 ~5 g
  59.             filehandle = open( self.filename, 'ab+' )! `* b# [; K) |' W: q
  60.             filehandle.write( data )
    , M! T9 c) Z  c  y6 G: P
  61.             filehandle.close()
    ; v2 M9 _9 Y+ f# y* G

  62. 2 _  E\" ]0 _  }* |& F7 V2 w6 ~; M
  63.             self.downloaded += len( data )
    7 r0 E: o$ I6 @  ?
  64.             #print "%s" % (self.name)
    # B$ O& a, G% b0 @
  65.             #progress = u'\r...'
    * \3 t9 q2 R3 P\" g# L# V8 P

  66. 6 A$ `- g& K& n# I
  67.             data = self.urlhandle.read( self.oneTimeSize )
    ' g8 z8 L& y9 J; P+ b  d
  68.          \" N7 y% G! o: }2 Z
  69. def GetUrlFileSize(url, proxies={}):0 H' i( v7 C+ P) z, w, s' x9 g
  70.     urlHandler = urllib.urlopen( url, proxies=proxies )4 C& N1 e8 S7 {2 y! W- K
  71.     headers = urlHandler.info().headers6 V7 b; |* O! E. R& o
  72.     length = 0
    $ d1 A8 z* ~* R1 M) [. B
  73.     for header in headers:3 J4 [( I' \, s# h\" e9 V6 b6 B' w
  74.         if header.find('Length') != -1:
    1 ^5 D3 @9 ]. u
  75.             length = header.split(':')[-1].strip()7 T: g0 S6 l2 d9 I7 f$ j0 I
  76.             length = int(length)  e3 \# e( e3 {( t0 K6 H9 `# p
  77.     return length- D( j' Q: w8 R7 `' k0 s

  78.   I0 V' W' A8 S3 \0 c
  79. def SpliteBlocks(totalsize, blocknumber):
    7 E- d2 A& Z+ I  l9 |
  80.     blocksize = totalsize/blocknumber
    \" J1 m# \, s+ r( ?: d; _/ \* t1 t
  81.     ranges = []
    # L3 Y2 g, u! \8 H. ]/ p! Z* e) N
  82.     for i in range(0, blocknumber-1):& h. A% v9 G; {( p7 M+ ?
  83.         ranges.append((i*blocksize, i*blocksize +blocksize - 1)); S5 r3 _3 G8 l
  84.     ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))9 X) ]3 H3 b! Q# U

  85. 1 x5 K, [, X9 |9 |  y* ?9 L
  86.     return ranges! r# j0 u4 w4 d
  87. def islive(tasks):6 ~1 x2 l% G) b! V+ H
  88.     for task in tasks:1 y& Q; t$ K2 J7 e8 M1 O7 `1 A7 r
  89.         if task.isAlive():
    ) c, k& k9 Q9 J
  90.             return True7 m- I\" X' n\" j/ u1 ^2 C. `
  91.     return False
    , `( D3 S, _, I( I

  92. ! V4 u6 N3 t0 w& z# l) `
  93. def paxel(url, output, blocks=6, proxies=local_proxies):: }3 J# B+ |) Q
  94.     ''' paxel0 M% J8 H8 u% Y9 X; J8 k3 O; q
  95.     '''% R% b6 U9 ]5 b* g
  96.     size = GetUrlFileSize( url, proxies )
    ) k; }' l. [/ i0 y
  97.     ranges = SpliteBlocks( size, blocks )/ l4 P) t\" N1 B# V8 {

  98. ( A\" H7 U' m# t- N
  99.     threadname = [ "thread_%d" % i for i in range(0, blocks) ]
    / M6 d$ X9 M6 h! F' _
  100.     filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]# Y- Z3 s\" v4 X
  101.    
    ( v$ o- M! d* r$ p! a2 R
  102.     tasks = []
    9 t% ]$ o1 R# G: a) l- L
  103.     for i in range(0,blocks):
    ) j\" B: ~+ S% @  n) ]& l: k\" K6 K
  104.         task = AxelPython( threadname[i], url, filename[i], ranges[i] )9 i5 e$ E4 @9 @2 L3 z
  105.         task.setDaemon( True )
    3 E% O. Q6 e# c/ D5 S# b
  106.         task.start()
    3 U* ^' I2 ^% c, p/ j' s& Z
  107.         tasks.append( task )
    , W; V9 W; ~/ l  y% R
  108.          
    6 Y+ q) g6 A  ]7 {& W; f* d3 m
  109.     time.sleep( 2 )2 ?! |% M! W* t6 f; t) ~) O8 j1 k; N( d3 V
  110.     while islive(tasks):3 Y3 e. b4 e8 H7 p; z
  111.         downloaded = sum( [task.downloaded for task in tasks] )
    1 {* b! y, E8 f% w
  112.         process = downloaded/float(size)*100
    9 C) w5 o( P* `
  113.         show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
    * b0 l7 i$ Q5 x! ^
  114.         sys.stdout.write(show)
    7 w, K/ w3 Y6 u- g6 C8 b* [+ c* `
  115.         sys.stdout.flush()
    . d- C; a  T* k\" y! [3 {% `\" |
  116.         time.sleep( 0.5 )
    7 u% a0 r# Z; d9 b3 [/ y- i
  117.              ; M. t! {9 X1 f2 v8 e
  118.     filehandle = open( output, 'wb+' )9 ~( z- V& i- g4 X# f0 N
  119.     for i in filename:0 I0 L% E7 U+ e7 c
  120.         f = open( i, 'rb' )
    ) P$ e7 X* b( X* h3 Q' W
  121.         filehandle.write( f.read() )3 K5 F* A7 X0 n\" z# v
  122.         f.close()
      G\" g& U4 i\" ]* Z, H- O/ D! q
  123.         try:
    1 V6 t( V1 \% |0 D* l, _
  124.             os.remove(i)
    % g1 h$ u$ C' v; ^: f; A4 X0 Q
  125.             pass+ ~0 d+ @+ L$ G$ |; Z1 H8 K: X4 l
  126.         except:
    ; K5 y9 g( G& H! @  F
  127.             pass. g9 t- T+ u6 T6 o

  128. % B) [% l+ j/ f+ J
  129.     filehandle.close()
    - @% g- C. o3 e; Q) O7 S
  130. / l5 |\" r: n9 A) p3 k4 E7 w
  131. if __name__ == '__main__':, T2 A4 }3 C( j3 E6 g. u) r
  132.     url = "http://xz1.mm667.com/xz84/images/001.jpg"
    9 [( u/ T# Y( a+ B3 J
  133.     output = '001.jpg'* X6 j: e- R% R5 @1 r9 l* l
  134.     paxel( url, output, blocks=4, proxies={} )
复制代码
! q8 {) n) R/ J; _; k
zan
转播转播0 分享淘帖0 分享分享0 收藏收藏0 支持支持0 反对反对0 微信微信
您需要登录后才可以回帖 登录 | 注册地址

qq
收缩
  • 电话咨询

  • 04714969085
fastpost

关于我们| 联系我们| 诚征英才| 对外合作| 产品服务| QQ

手机版|Archiver| |繁體中文 手机客户端  

蒙公网安备 15010502000194号

Powered by Discuz! X2.5   © 2001-2013 数学建模网-数学中国 ( 蒙ICP备14002410号-3 蒙BBS备-0002号 )     论坛法律顾问:王兆丰

GMT+8, 2026-6-2 22:37 , Processed in 0.387757 second(s), 51 queries .

回顶部