QQ登录

只需要一步,快速开始

 注册地址  找回密码
查看: 2232|回复: 0
打印 上一主题 下一主题

多线程下载图集

[复制链接]
字体大小: 正常 放大

1186

主题

4

听众

2923

积分

该用户从未签到

跳转到指定楼层
1#
发表于 2024-3-31 17:18 |只看该作者 |正序浏览
|招呼Ta 关注Ta
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
  1. #!/usr/bin/python
    / B/ E. x8 W! \& j  g9 E. S$ p. m
  2. # -*- coding: utf-8 -*-3 x, v/ d! S7 V
  3. # filename: paxel.py
    1 s; c\" I1 [1 {/ _; t* M& G
  4. : K+ V+ k8 t, p6 L) s, j
  5. '''It is a multi-thread downloading tool
    4 v+ T7 [5 H8 V: H

  6. : b, B# m: v5 l5 ^/ ]
  7.     It was developed follow axel.
    $ ^: p7 x9 k* |( D
  8.         Author: volans8 q\" ~  C! u; w* O8 l
  9.         E-mail: volansw [at] gmail.com- w; [& |9 b6 Y% |4 G
  10. '''* N9 `& B' y  z  r/ d! v; D  M
  11. 5 x9 @+ W8 z3 f+ k
  12. import sys
    ! |4 l# V) X/ C\" F
  13. import os: @( F1 v/ w0 k0 Q3 c6 Y% f6 D
  14. import time
    % d- }: y\" n  ]( w' p% i0 b
  15. import urllib. P) Y$ W# n0 K\" k
  16. from threading import Thread; ~\" V3 e6 w! p5 g
  17. 8 X1 ]7 Z0 I3 A
  18. local_proxies = {'http': 'http://131.139.58.200:8080'}- E1 u( Q# u- R. Y

  19. , }+ e  G. e& B8 b5 z, p1 Y
  20. class AxelPython(Thread, urllib.FancyURLopener):
    ( L% X( ?3 B/ i( Y% x( e
  21.     '''Multi-thread downloading class.8 ^+ R+ k# t  M5 d
  22. . w$ f/ t8 z\" l
  23.         run() is a vitural method of Thread.
    + ^  I( e6 T' I% `1 ?
  24.     '''7 A, m( l( P/ r' _5 c7 \5 N- K5 C
  25.     def __init__(self, threadname, url, filename, ranges=0, proxies={}):1 m2 q9 z% `; x$ g6 O  _
  26.         Thread.__init__(self, name=threadname)
    / P) f$ B& K% }, S
  27.         urllib.FancyURLopener.__init__(self, proxies): d, {' F( |4 g! e, [3 ^! t
  28.         self.name = threadname+ l4 N0 K5 m: T( o( l
  29.         self.url = url. d' V* C4 f- c; m' M! w\" s& E+ B
  30.         self.filename = filename
    & m+ l/ G7 O+ m4 P$ N9 P3 x/ x
  31.         self.ranges = ranges. K! `2 q1 T' x& ~3 e/ E
  32.         self.downloaded = 0
    - k+ c\" k) {4 Y6 U4 Q4 V
  33. 9 ]! B8 a* Z) [+ h7 v8 W3 D
  34.     def run(self):
    0 w! g% A/ s- K/ U4 Q1 r  Y- f% R
  35.         '''vertual function in Thread'''4 ~0 q1 A8 K5 n3 H
  36.         try:
    \" o+ R9 P' y: h% u4 s\" {: Y
  37.             self.downloaded = os.path.getsize( self.filename )* b( }/ Z% K; i( H
  38.         except OSError:
    8 T8 m8 H, N3 Q' C
  39.             #print 'never downloaded'
    , L) j0 a. O% k
  40.             self.downloaded = 0  ^2 |6 d3 D) V2 l+ o% R  H

  41. ' z8 `\" N/ v# m, i
  42.         # rebuild start poind5 d& T; ~\" Q- b7 Q\" ?) ]
  43.         self.startpoint = self.ranges[0] + self.downloaded' n5 T4 u+ U5 C% q
  44.          
    5 R4 i\" U% k$ J- b+ y& B
  45.         # This part is completed
    ; d- a: ]: C- I\" Y% e1 a& Y
  46.         if self.startpoint >= self.ranges[1]:8 `( C6 s% C- H! t
  47.             print 'Part %s has been downloaded over.' % self.filename
    & R  P  {8 J1 F% t3 T
  48.             return
    ) c$ ^6 {* k; b0 Y! f
  49.          
    4 w3 ]  Q2 r/ D2 H- V
  50.         self.oneTimeSize = 16384 #16kByte/time
    7 ]3 M+ B4 m1 W; }
  51.         print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])- O* o* n$ `; R

  52.   F8 a6 i/ r; O# z, j% ?  S
  53.         self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))& }2 D+ o8 ]7 I1 T\" G! v0 N& o5 x
  54.              ' N8 k3 I1 x\" X
  55.         self.urlhandle = self.open( self.url )
    , m1 j; N6 A6 v3 s& \
  56. ' n9 l4 m7 Z2 i0 {  B3 m
  57.         data = self.urlhandle.read( self.oneTimeSize )
    $ ^\" W+ Y1 E! o4 x
  58.         while data:1 F4 M# ^5 Z! q\" E: M
  59.             filehandle = open( self.filename, 'ab+' )
    * C1 U( B. e7 N: b( I7 J
  60.             filehandle.write( data )
    . I) l# U3 |3 O9 J\" F
  61.             filehandle.close()
    ' G- C$ k5 l  w% e* L* q. ?

  62. & x) E  P; n* V, f! O. A0 V2 G; u3 i
  63.             self.downloaded += len( data )
    / o# g) n; J! p7 m& |; q: r- c* W
  64.             #print "%s" % (self.name)& i: ~6 o# [7 |7 u# ~  I
  65.             #progress = u'\r...'; g  g! j2 u5 r4 t

  66. ( A( f; _! m9 d0 p# H) I
  67.             data = self.urlhandle.read( self.oneTimeSize )
    5 E! Y  j+ C! F, \
  68.          
    0 f1 }- o1 G( o$ v8 I# F
  69. def GetUrlFileSize(url, proxies={}):2 X+ @/ R( w3 e5 j, `' m
  70.     urlHandler = urllib.urlopen( url, proxies=proxies )$ k2 Z3 b( e+ K' S6 i
  71.     headers = urlHandler.info().headers
    # c2 {5 _( g+ q
  72.     length = 0( \8 ~3 j2 y1 z; C2 L$ Q
  73.     for header in headers:
    4 q/ H8 }% v- \8 Z+ a2 R5 W
  74.         if header.find('Length') != -1:
    - M+ M- m/ I. k* h( Z2 z
  75.             length = header.split(':')[-1].strip()' T: v' O2 x\" p1 r' t
  76.             length = int(length)3 g- c6 i6 F! `% f2 ]7 e
  77.     return length! B  w3 k3 U8 g1 w$ ]

  78. + |& I8 X+ ?  U3 e) `/ `+ v
  79. def SpliteBlocks(totalsize, blocknumber):8 O* j9 T& I8 K; P1 W. y
  80.     blocksize = totalsize/blocknumber
    0 X9 m+ o3 i4 D
  81.     ranges = []
    % P; K; V. [) w% |# J. H; r- k
  82.     for i in range(0, blocknumber-1):
    * x1 V5 j% M2 E( f0 n8 p/ ~! w
  83.         ranges.append((i*blocksize, i*blocksize +blocksize - 1))- O, w% Y& F% B* _- {
  84.     ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
    ) n# Z  f0 l8 C% ^. P/ K& G
  85. 7 Q: }* Y( ^! l( L# k/ T6 S$ p
  86.     return ranges0 z5 p/ b% L1 x) }1 e
  87. def islive(tasks):1 S+ n/ \; J* A6 S+ L5 g
  88.     for task in tasks:% ~- ?/ J\" k$ g; r# D
  89.         if task.isAlive():
    - G2 T, c) R) O\" ^' U8 e
  90.             return True1 J  U1 s: }- p. a3 y
  91.     return False
    7 u! j0 U: Z4 C: T
  92. / H3 R; R* V, j  \9 C/ c+ u( F
  93. def paxel(url, output, blocks=6, proxies=local_proxies):# ~( E2 z7 ]0 e9 d
  94.     ''' paxel# S; j8 R9 B% v, [3 R& G
  95.     '''0 F5 ~$ s: {1 ?( I, N\" n6 G
  96.     size = GetUrlFileSize( url, proxies )
    ' S/ b; v% P1 z$ W5 U
  97.     ranges = SpliteBlocks( size, blocks )
    - F8 }, R6 j/ I8 ?1 a8 l
  98. $ s! ]5 y: H\" H* d
  99.     threadname = [ "thread_%d" % i for i in range(0, blocks) ]/ J4 U7 w5 n5 \% i3 V% @
  100.     filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
    - ^& x1 v. Q* N' B. ]. J' {* }* A
  101.    
    9 v! x3 J! ?1 E( }% ]# K- ^
  102.     tasks = []+ K+ [1 \7 d0 A$ w# \, B# X; s
  103.     for i in range(0,blocks):
    : w& B\" u, u$ o  M% v) S6 p
  104.         task = AxelPython( threadname[i], url, filename[i], ranges[i] )
    ! f8 g$ \% r% ~; u: S/ k9 U
  105.         task.setDaemon( True )
    ' V/ ?8 T( N7 F8 z
  106.         task.start()6 ^7 w: x  k+ o  q\" S0 r
  107.         tasks.append( task )
    % q+ i7 G; L2 a5 Q' k
  108.          
    2 n. f# n9 f: ]9 S$ G3 a  g; W6 c# b
  109.     time.sleep( 2 )
    : D5 ?0 Z5 j5 [, {% u
  110.     while islive(tasks):1 y4 l& t& o\" w+ _
  111.         downloaded = sum( [task.downloaded for task in tasks] )
    , c* q# P; O# r# V# L' E. v
  112.         process = downloaded/float(size)*1006 M- t$ r: |5 Z! P4 z& ?+ P
  113.         show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
    ' n6 M. A7 Q% a7 j$ {) d; V5 _# e
  114.         sys.stdout.write(show)
    ) C3 N* t7 i3 f$ k1 e: }& b
  115.         sys.stdout.flush()
    1 w& v+ s1 d5 J! f( \0 Y
  116.         time.sleep( 0.5 )\" c; q' F7 e, i5 K! t
  117.              , P1 }7 h8 O6 z
  118.     filehandle = open( output, 'wb+' )& a; b; w1 o, I
  119.     for i in filename:1 C+ I, X9 J$ ^( I1 @- y* e
  120.         f = open( i, 'rb' )6 q3 `' `) D  h; D+ f
  121.         filehandle.write( f.read() )9 i& u( o6 q% n* B( V+ z
  122.         f.close()
    4 c7 Q, ?3 g3 U4 _7 ?+ P+ Y
  123.         try:
    & i5 c* j' [% X( `: X3 h
  124.             os.remove(i)
    ; y; ^4 O( i\" S7 u9 X' ^
  125.             pass5 Y8 j0 ?  `. N$ M; C8 R
  126.         except:
    5 N$ y3 g9 {. x( t( ]1 C( k\" b. y
  127.             pass, Q7 \$ k, z% {
  128. 9 P' \% o1 p( M, b4 R
  129.     filehandle.close()
    0 l; D9 w4 m+ i) r1 \. k) ?

  130. ' ]! c6 y6 s  R1 N. D7 Y: Q
  131. if __name__ == '__main__':; a4 S4 E& E4 u$ v0 b
  132.     url = "http://xz1.mm667.com/xz84/images/001.jpg"* C2 N1 o- L) d7 G( {0 u, [* b7 E
  133.     output = '001.jpg'/ @: {( }3 Z7 y, l5 c! d
  134.     paxel( url, output, blocks=4, proxies={} )
复制代码

* p/ }9 @6 `6 X' v/ z1 g
zan
转播转播0 分享淘帖0 分享分享0 收藏收藏0 支持支持0 反对反对0 微信微信
您需要登录后才可以回帖 登录 | 注册地址

qq
收缩
  • 电话咨询

  • 04714969085
fastpost

关于我们| 联系我们| 诚征英才| 对外合作| 产品服务| QQ

手机版|Archiver| |繁體中文 手机客户端  

蒙公网安备 15010502000194号

Powered by Discuz! X2.5   © 2001-2013 数学建模网-数学中国 ( 蒙ICP备14002410号-3 蒙BBS备-0002号 )     论坛法律顾问:王兆丰

GMT+8, 2026-4-19 11:10 , Processed in 0.456009 second(s), 51 queries .

回顶部