QQ登录

只需要一步,快速开始

 注册地址  找回密码
查看: 1495|回复: 0
打印 上一主题 下一主题

多线程下载图集

[复制链接]
字体大小: 正常 放大

1175

主题

4

听众

2867

积分

该用户从未签到

跳转到指定楼层
1#
发表于 2024-3-31 17:18 |只看该作者 |倒序浏览
|招呼Ta 关注Ta
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
  1. #!/usr/bin/python! P: U1 V) j2 q, I8 ?' K
  2. # -*- coding: utf-8 -*-
    4 Y( t' T3 [2 s' o) f
  3. # filename: paxel.py; |, J: [9 K2 l4 d7 a4 |/ a
  4.   m5 Q% _6 F1 V4 D- L8 s' c
  5. '''It is a multi-thread downloading tool' v, J\" w! F% W8 e9 K\" s

  6. % P3 @4 K) A# J1 ]. R) P  W
  7.     It was developed follow axel.
    8 x1 E6 D) C9 t/ Y9 z; h
  8.         Author: volans
    % J% A: Q2 |8 l2 `( k5 m% l9 @5 B+ |
  9.         E-mail: volansw [at] gmail.com, V. g- j* B& |9 f
  10. '''
    ' g  h/ l! ~. \1 s* G

  11. ! Z) k! n5 D\" _
  12. import sys\" O/ l. y; ^4 z. a7 k8 B
  13. import os
    7 u) s- ?5 U9 O# [4 L1 D, [
  14. import time
    5 U. h- q. \! K  B) P
  15. import urllib9 [+ K% t% q, e3 ~
  16. from threading import Thread, Q. g* M/ G$ Z2 c, H6 j

  17. 4 T, a' }3 m4 e4 {. i8 Y
  18. local_proxies = {'http': 'http://131.139.58.200:8080'}
    3 `\" \0 a7 `6 y. V/ L: C& p
  19. , m2 b# x\" S$ z9 Z* l& U
  20. class AxelPython(Thread, urllib.FancyURLopener):1 r2 [: a) E0 ^2 F
  21.     '''Multi-thread downloading class.( O7 K  a$ f! K) Z/ f( H! V

  22. # M& y) d  `( a/ b2 l
  23.         run() is a vitural method of Thread.
    1 F& A) \% s\" S6 ~' x  |% ~' G
  24.     '''
    + E' u/ y; ^( Q4 D
  25.     def __init__(self, threadname, url, filename, ranges=0, proxies={}):
    / l( }! c$ Z/ H- v\" F! Z! @
  26.         Thread.__init__(self, name=threadname)  I3 g/ E9 e. I1 t1 z
  27.         urllib.FancyURLopener.__init__(self, proxies)
    5 k4 I8 O% L( U' p' E
  28.         self.name = threadname8 [+ q/ f\" }( d# @& |7 g
  29.         self.url = url8 m, W' c  L4 N. l, N
  30.         self.filename = filename8 i9 m8 y( [\" I
  31.         self.ranges = ranges2 f3 U& v5 s* F! _& U4 V* U0 _8 e
  32.         self.downloaded = 0
    : I2 g0 ~* Z4 y4 O. q$ p
  33. 6 o2 s2 G1 T* O
  34.     def run(self):; ?' ~7 ]- k3 \4 U, H( O
  35.         '''vertual function in Thread'''
    + `/ Z0 z- @( v\" G- U\" R
  36.         try:) P* Z* R6 O6 m: {% k
  37.             self.downloaded = os.path.getsize( self.filename )
    $ p$ n( D$ U$ ?$ q
  38.         except OSError:5 {: z+ e2 T\" G% B) A3 c
  39.             #print 'never downloaded'* h+ A9 ~4 o% S4 e
  40.             self.downloaded = 0( Q$ z% t3 I/ B6 ]+ H: M$ P) d\" c
  41. 4 E% j1 o  I$ ^. u$ j6 i
  42.         # rebuild start poind
    , L. |% k2 M! b9 _
  43.         self.startpoint = self.ranges[0] + self.downloaded: [1 Q; g1 `  j9 W/ E% n) K1 f: {
  44.          
    9 \0 @  C\" l2 ?3 t
  45.         # This part is completed
    * j2 s% d# a* w' j
  46.         if self.startpoint >= self.ranges[1]:
    - j' f% }& c8 K. ]3 O$ `; d
  47.             print 'Part %s has been downloaded over.' % self.filename( V. ^4 f7 L; I% }
  48.             return
    0 t2 b  J8 [# n$ I( V0 _
  49.          
    ! s, P+ o  [$ ?4 E( k
  50.         self.oneTimeSize = 16384 #16kByte/time1 b' r0 a* L7 a
  51.         print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1]). X  l/ X\" r( f4 m3 j* m$ V

  52. 8 l\" D4 {! v. H! c: u( b
  53.         self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))7 ?& D* J6 f: G3 Q  S\" e( |
  54.             
    % S* M+ n/ B- g+ E
  55.         self.urlhandle = self.open( self.url )! q2 C\" f2 a2 @% }

  56. ) q+ |& y5 Q7 J: Q: r
  57.         data = self.urlhandle.read( self.oneTimeSize )# N9 z7 R  O! {9 n3 |
  58.         while data:9 J9 {; G* x9 j' s* u) |
  59.             filehandle = open( self.filename, 'ab+' )$ A/ X9 q& V5 s/ ?) u
  60.             filehandle.write( data ): H) s! r! p/ @
  61.             filehandle.close()
    0 E8 F& _% Y, g9 `

  62. 5 M1 |3 h9 R  v# l1 Z+ q  b
  63.             self.downloaded += len( data ), R9 X2 a2 t. O! g% I9 I
  64.             #print "%s" % (self.name): G& d* ?- i  l# c0 p6 M& Y
  65.             #progress = u'\r...'
    3 b9 ~$ M4 i- N, K5 }
  66. ; n3 O0 Q' K/ v+ ]5 \3 v0 N
  67.             data = self.urlhandle.read( self.oneTimeSize )$ D& n3 D. b) U
  68.          
    3 J, y  a! Y. N' L1 j  c* n6 Z
  69. def GetUrlFileSize(url, proxies={}):, X! H8 c0 O. `9 c2 G% o
  70.     urlHandler = urllib.urlopen( url, proxies=proxies )2 T, G: _3 y6 O; m# @
  71.     headers = urlHandler.info().headers
      K' Z3 b: N! U9 d; P  J
  72.     length = 0. M: L1 [& m7 s5 c- T
  73.     for header in headers:
    : O\" u$ l. x  |& w
  74.         if header.find('Length') != -1:
    / L, J/ o# r' A) h# A
  75.             length = header.split(':')[-1].strip()
    - h7 O  E% D: h\" D& @8 Z
  76.             length = int(length)8 w$ ?$ u8 A; ?( t, ~; U
  77.     return length5 t' j2 H* I& w7 m: Y
  78. . P+ t) _( z2 \$ Z+ j
  79. def SpliteBlocks(totalsize, blocknumber):) r4 P) D2 o. m! T# Y
  80.     blocksize = totalsize/blocknumber
    % F% Q! p0 v- j( {
  81.     ranges = []
    % F7 A; d- Z1 {7 G5 y! @6 {; F0 y
  82.     for i in range(0, blocknumber-1):1 E0 m6 Y0 o: G) S1 E2 H& i: h; i
  83.         ranges.append((i*blocksize, i*blocksize +blocksize - 1))$ s( G3 i# W+ [3 p5 Y0 a& ~
  84.     ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
    # V) g: ~, `: y7 ]0 c+ Y3 v
  85. , r3 G9 Y! ^) D$ t\" q% I, |- |/ A  D
  86.     return ranges2 e7 D7 X* S' e! g; i& y
  87. def islive(tasks):: t8 @' O\" U& L\" ?/ X
  88.     for task in tasks:
    $ l: h% a: w/ u: R9 e/ q2 D
  89.         if task.isAlive():7 B; s& ~\" H) C9 w
  90.             return True
    ( }6 n. A$ }3 u1 z
  91.     return False
    5 E7 s. ^$ R* `1 ^& Q\" J

  92. # I# D4 r' P& V9 }5 @
  93. def paxel(url, output, blocks=6, proxies=local_proxies):
      G' s8 i\" y' m
  94.     ''' paxel' w# W4 f' g9 U, Y% W- V
  95.     '''' M\" V; r4 n. ~3 R$ z: k
  96.     size = GetUrlFileSize( url, proxies )
    1 t! @3 k& T4 v; G$ q0 `
  97.     ranges = SpliteBlocks( size, blocks )8 |2 O, S  W0 S# z

  98. - [# }) ~$ S8 ^\" @' h2 U, `9 z
  99.     threadname = [ "thread_%d" % i for i in range(0, blocks) ]3 S* ~: M& [\" X- F0 N5 ]9 T) d7 \  h
  100.     filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]# n$ [6 ]5 b0 \/ ^+ _6 W
  101.    6 Y9 s4 q) c2 l; J7 v
  102.     tasks = []
    1 g0 @: s# j4 [6 W; u9 U
  103.     for i in range(0,blocks):3 S- d2 M; P; y0 f; a8 c* d5 T
  104.         task = AxelPython( threadname[i], url, filename[i], ranges[i] )
    5 \$ K: T6 i6 z& i( [5 Q/ O$ u
  105.         task.setDaemon( True )2 l6 b5 m) I6 j0 [
  106.         task.start()1 F# X' p7 d6 y
  107.         tasks.append( task )
    ! o/ \6 P# H7 Y4 N' U- Y* [* {
  108.          
    / I/ ]2 W0 P9 @) b- O
  109.     time.sleep( 2 )
    , z3 P' p' a; H$ S$ L0 i9 k
  110.     while islive(tasks):
    5 ]  ~; h$ M& F0 y) b2 M$ N
  111.         downloaded = sum( [task.downloaded for task in tasks] )' h1 e6 e$ n$ w\" Z! q, d- k8 N% M3 L
  112.         process = downloaded/float(size)*1007 T; l- g: n. M9 ^2 o
  113.         show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
    - O6 j* F+ F+ g+ k0 }
  114.         sys.stdout.write(show), r% e$ R/ I- V/ Y
  115.         sys.stdout.flush()
    + ^& _! I1 l) U9 {& k- F/ P0 o
  116.         time.sleep( 0.5 )
    + z8 L) j7 c3 F  c) m
  117.             
    ' u* }\" o3 m! E
  118.     filehandle = open( output, 'wb+' )7 B7 B6 Y\" K$ h, k9 ~8 M. v
  119.     for i in filename:
    . [% B4 P3 U# l0 ]) s, W
  120.         f = open( i, 'rb' )
    - l% c. }, y. W* C& W/ i
  121.         filehandle.write( f.read() )
    ( d' j4 ^9 d2 y+ U1 d) y5 _& |1 ~
  122.         f.close()9 C% p& H+ F2 f# p
  123.         try:
    * A$ q( L, U8 L' K
  124.             os.remove(i)
    8 [# o( J: |0 m' V0 q1 i7 z
  125.             pass, y# |$ n( H- Q( P6 y+ S
  126.         except:
    ) `% m2 \+ E6 K) h* a( r
  127.             pass
    9 Z* ]$ ]5 e8 Q% g# @
  128. 4 \: I. t0 W: J$ Y
  129.     filehandle.close()% F' ]\" {0 a8 r\" G
  130. / M4 P1 _& o! g% J; a
  131. if __name__ == '__main__':5 d\" l2 T& F# a- g4 P6 l4 t
  132.     url = "http://xz1.mm667.com/xz84/images/001.jpg"! S: E, Q7 h, f2 X9 S7 g' T
  133.     output = '001.jpg'
    4 }+ z+ u3 @1 b
  134.     paxel( url, output, blocks=4, proxies={} )
复制代码
* q# w3 p: \  t3 V, U" A
zan
转播转播0 分享淘帖0 分享分享0 收藏收藏0 支持支持0 反对反对0 微信微信
您需要登录后才可以回帖 登录 | 注册地址

qq
收缩
  • 电话咨询

  • 04714969085
fastpost

关于我们| 联系我们| 诚征英才| 对外合作| 产品服务| QQ

手机版|Archiver| |繁體中文 手机客户端  

蒙公网安备 15010502000194号

Powered by Discuz! X2.5   © 2001-2013 数学建模网-数学中国 ( 蒙ICP备14002410号-3 蒙BBS备-0002号 )     论坛法律顾问:王兆丰

GMT+8, 2025-8-20 13:27 , Processed in 0.404273 second(s), 50 queries .

回顶部