QQ登录

只需要一步,快速开始

 注册地址  找回密码
查看: 1631|回复: 0
打印 上一主题 下一主题

多线程下载图集

[复制链接]
字体大小: 正常 放大

1176

主题

4

听众

2884

积分

该用户从未签到

跳转到指定楼层
1#
发表于 2024-3-31 17:18 |只看该作者 |正序浏览
|招呼Ta 关注Ta
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
  1. #!/usr/bin/python; Y) g9 g\" k' e1 \& t* @
  2. # -*- coding: utf-8 -*-9 J& W0 L$ u$ t! G& n8 r
  3. # filename: paxel.py
    # T; L7 q/ v: ]
  4. 2 i6 ]) i- C+ A: V* `* t/ c
  5. '''It is a multi-thread downloading tool( B' z  r9 J: u: d# H/ k

  6. 7 x9 l! I/ c4 U
  7.     It was developed follow axel.
    5 }+ z# x* V0 h% P: p\" i& Y/ ?
  8.         Author: volans
    : Q& ?1 T5 e' R. L: j: n\" p
  9.         E-mail: volansw [at] gmail.com5 _, |\" J2 d\" V$ z
  10. '''$ s. U7 u, j, x2 @2 X( K, n, j
  11. 4 ]8 m3 t: q& c% A
  12. import sys) b0 s& q6 u. E+ h\" m' g
  13. import os  x6 S' r7 ^( {) ]* `, ?7 a5 |
  14. import time/ C0 H+ J9 Y. @! }3 m3 ]
  15. import urllib
    + P. O# u* P1 _1 R7 T
  16. from threading import Thread1 N9 `% `0 |8 `% j
  17. 5 U& `3 D2 F& X
  18. local_proxies = {'http': 'http://131.139.58.200:8080'}6 s7 y# s9 ^5 c* t' c1 ^! H
  19. - T\" o+ X0 V+ @. P7 A6 I
  20. class AxelPython(Thread, urllib.FancyURLopener):( A7 t. }% t* m9 ^7 x\" O  r
  21.     '''Multi-thread downloading class.6 }6 }3 D$ O% P# R* v) d\" M8 j

  22. 1 t3 ]6 Y5 j; c2 h4 S4 A0 f# c
  23.         run() is a vitural method of Thread.; t2 x/ j- c/ H% N( C\" Z
  24.     '''
    4 ~3 o: ?& s8 V, F$ U
  25.     def __init__(self, threadname, url, filename, ranges=0, proxies={}):
    2 c% i: k# I9 v\" |7 O* Q
  26.         Thread.__init__(self, name=threadname)5 x\" X/ I% ~( x4 ]
  27.         urllib.FancyURLopener.__init__(self, proxies)
    \" }& ^7 |  m. [
  28.         self.name = threadname4 d4 c- w6 y\" y0 |\" x
  29.         self.url = url
    7 j6 A  f, G2 E7 |, t
  30.         self.filename = filename
    3 C7 E2 k+ A/ E+ w! I+ G' L4 q
  31.         self.ranges = ranges' r9 f$ k! }  A) L) p1 e! A
  32.         self.downloaded = 05 n' W; G  J) y6 B* n/ ~& w

  33. 6 x; y& e/ Z5 H+ c
  34.     def run(self):
    , P& ^! x2 b7 U9 J, r
  35.         '''vertual function in Thread'''
    1 b) a- T- ]0 [8 K3 |
  36.         try:' ^9 ^7 ?6 ^& R  y2 ]$ g
  37.             self.downloaded = os.path.getsize( self.filename )
    * s5 _9 h\" }# t; D
  38.         except OSError:5 P' U+ I9 [( X
  39.             #print 'never downloaded'( y+ D( M7 `2 j) r$ ^
  40.             self.downloaded = 0
    . L6 h: m1 U: F2 c0 u6 i
  41. 1 g/ J# S, Y# ^  H: b8 d4 a
  42.         # rebuild start poind
    3 B8 O! ^) f* f0 ~& a
  43.         self.startpoint = self.ranges[0] + self.downloaded8 w7 b( W2 W# B
  44.          * Y0 E/ L' w; C& O4 o7 ]! }; b
  45.         # This part is completed
    ; }% q% `( Y( _$ d: a0 ^
  46.         if self.startpoint >= self.ranges[1]:
    # `, o) [8 \  V' ^
  47.             print 'Part %s has been downloaded over.' % self.filename
    9 j\" ^% i4 C+ D( F* b
  48.             return3 {) n! S- w6 w) G# B3 b
  49.          $ [4 T; [& z. A  g) b1 _* ^
  50.         self.oneTimeSize = 16384 #16kByte/time
    + n0 b0 @; W+ W& W1 k
  51.         print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
    6 p( N. {& @( x

  52. 0 k5 N0 y5 E( W, c
  53.         self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))) d& l7 z0 j7 l9 D
  54.             
    8 C- S5 a& j& O) W\" y
  55.         self.urlhandle = self.open( self.url )
    6 B: y\" N; h  ]& H9 J

  56. ( S$ M5 z. i- o$ I. j
  57.         data = self.urlhandle.read( self.oneTimeSize )2 y( B# a5 p  C7 ~5 _
  58.         while data:- D3 ?: k3 L( v5 N) m  T3 d# V
  59.             filehandle = open( self.filename, 'ab+' )3 d* Q# W# Y+ I\" W* H
  60.             filehandle.write( data )8 ]2 ^' y. @* W2 `) n9 x& r
  61.             filehandle.close()
    ( N+ P8 R, k, r

  62. ' G/ @0 d# C! _4 k! j; W8 y' |  W- a
  63.             self.downloaded += len( data )
    % o  ^4 l! k  L  a9 R1 m
  64.             #print "%s" % (self.name)
    % Q' ?% r- x$ w
  65.             #progress = u'\r...'
    3 \7 h; g4 r! _+ N% @0 m

  66. \" d: }5 f& m0 L/ R6 n5 V\" e
  67.             data = self.urlhandle.read( self.oneTimeSize )# v) X. g2 N: ?8 g; r4 {: }' E
  68.          2 _# y1 \0 g$ e+ M) P4 T
  69. def GetUrlFileSize(url, proxies={}):) _8 z& F/ G8 l- n\" \% ?  P
  70.     urlHandler = urllib.urlopen( url, proxies=proxies ), y- b\" U; i8 P0 C% Q
  71.     headers = urlHandler.info().headers; A3 k) @1 c. |# `8 f
  72.     length = 02 [3 A- B; o* e3 T9 g+ |. c
  73.     for header in headers:7 x  w! \  q7 R1 w: X8 f
  74.         if header.find('Length') != -1:1 g7 S$ z4 c) A
  75.             length = header.split(':')[-1].strip()( S8 ^# y) S\" W
  76.             length = int(length)
    - P  X# H  n0 c4 V/ |1 D/ |
  77.     return length5 Q, z. U2 k9 P( @# S

  78. 3 S4 Y6 o% _5 u6 `
  79. def SpliteBlocks(totalsize, blocknumber):) L. T4 |# U7 ^) b\" l, e( ~\" N9 Q+ w
  80.     blocksize = totalsize/blocknumber
    \" g$ ^: P% P4 c8 I- v7 N
  81.     ranges = []' t) U8 ?: l/ b
  82.     for i in range(0, blocknumber-1):1 f* ^\" s- o. k$ W& ~$ Y
  83.         ranges.append((i*blocksize, i*blocksize +blocksize - 1))
    1 M: u3 n9 y$ s0 K
  84.     ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))# N+ k9 v& F/ ~
  85.   w) W: Y4 A: g, E8 `* c* `# s& A
  86.     return ranges7 m5 c; ?3 ^: r* v( _# T
  87. def islive(tasks):
    1 s# z7 e- p4 d$ I* H( N  |, p0 }
  88.     for task in tasks:. `: U- |8 i' q) q
  89.         if task.isAlive():4 N/ m* K9 O6 u* N( [2 t5 T\" D- h0 q
  90.             return True
    ' M\" ~1 P) J. E$ ^. f; s2 T
  91.     return False/ W  s: j4 @: O* c, g0 |
  92. ! O- }4 a8 Z0 _; ~
  93. def paxel(url, output, blocks=6, proxies=local_proxies):
    % _! W# L9 v. o' u- ~. E3 Y6 @1 b
  94.     ''' paxel
    ; l, ^\" W) B) q5 f* f+ ~7 S
  95.     '''
    & G  y/ F4 K; t% n6 H: e\" o/ Z7 R
  96.     size = GetUrlFileSize( url, proxies )
    ; Q$ a4 x# _5 s& Y, o
  97.     ranges = SpliteBlocks( size, blocks )2 L0 [( y( a+ X! s. ~- m# n. U

  98. 9 A( f: y3 t) G& s
  99.     threadname = [ "thread_%d" % i for i in range(0, blocks) ]\" H4 ~6 ?) ^' d
  100.     filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
    - i5 C6 s( ], R. t! K, `
  101.    1 i/ A+ x$ U. I/ Z
  102.     tasks = []
    0 l' O# _/ j7 y9 G  S8 |
  103.     for i in range(0,blocks):; I4 i& M2 ~5 x9 z. U5 z
  104.         task = AxelPython( threadname[i], url, filename[i], ranges[i] )
    2 e9 |\" J; C3 v. L7 F0 C$ T, `( F
  105.         task.setDaemon( True )\" E( B+ Y3 h; T9 J* e% d
  106.         task.start()1 j9 C; I2 J) ^. l8 z+ r* u
  107.         tasks.append( task )8 z# D# F6 V& _5 z
  108.          
    8 Q/ i+ l: v\" B9 }. ^
  109.     time.sleep( 2 )' S! H. {8 f# f3 C/ V8 E& [3 W
  110.     while islive(tasks):8 x+ A) u9 @: p7 b
  111.         downloaded = sum( [task.downloaded for task in tasks] )0 A7 F: m' u\" K, v0 M0 Z& y, T4 N
  112.         process = downloaded/float(size)*100
    6 u: ]1 @3 E( K! v8 \4 D- r
  113.         show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process): [  q: [* K: t8 f1 E9 I
  114.         sys.stdout.write(show)
    ! |/ s- O. z& \
  115.         sys.stdout.flush(). u  ?) l! n; p/ X& r3 m! E
  116.         time.sleep( 0.5 )
    / f\" M: s5 l0 h/ B: u/ [/ z: R5 e
  117.             
    , _* \- n0 c$ B: P$ Y* |) F
  118.     filehandle = open( output, 'wb+' )
    0 T5 @/ u0 F3 W: E% v
  119.     for i in filename:3 G/ e! B$ F5 l0 N
  120.         f = open( i, 'rb' ), \. J  F4 O$ q+ T7 P
  121.         filehandle.write( f.read() )
    9 @# s\" _# ~8 \' a+ z+ F  }
  122.         f.close()  u& s& B\" q1 q% d8 n
  123.         try:
    % W7 @8 m, S8 P\" z2 F
  124.             os.remove(i)) t+ G- R7 a\" f: u- b: V, e
  125.             pass
      V( u. d6 g4 D; ]' L0 X; p( `2 u$ ]
  126.         except:! o0 m; t# O6 g! E' d6 f# O
  127.             pass
    $ Q  J- e/ S( B# _. ~# i( ^, v* d
  128. 8 d9 F' R9 `+ d. U+ H4 l! ?: H
  129.     filehandle.close()8 M; o# @  a' _9 S
  130. 0 w4 p\" D% ~- S\" Q
  131. if __name__ == '__main__':
    3 Y4 J  ^% Y& Y; C+ O
  132.     url = "http://xz1.mm667.com/xz84/images/001.jpg"' |% _) x0 D) u* B& `$ t; Y
  133.     output = '001.jpg': X8 r5 |3 N9 z  E8 Z: r3 K+ P
  134.     paxel( url, output, blocks=4, proxies={} )
复制代码

2 B7 f/ r- y' k
zan
转播转播0 分享淘帖0 分享分享0 收藏收藏0 支持支持0 反对反对0 微信微信
您需要登录后才可以回帖 登录 | 注册地址

qq
收缩
  • 电话咨询

  • 04714969085
fastpost

关于我们| 联系我们| 诚征英才| 对外合作| 产品服务| QQ

手机版|Archiver| |繁體中文 手机客户端  

蒙公网安备 15010502000194号

Powered by Discuz! X2.5   © 2001-2013 数学建模网-数学中国 ( 蒙ICP备14002410号-3 蒙BBS备-0002号 )     论坛法律顾问:王兆丰

GMT+8, 2025-9-14 05:27 , Processed in 1.248079 second(s), 51 queries .

回顶部