QQ登录

只需要一步,快速开始

 注册地址  找回密码
查看: 2231|回复: 0
打印 上一主题 下一主题

多线程下载图集

[复制链接]
字体大小: 正常 放大

1186

主题

4

听众

2923

积分

该用户从未签到

跳转到指定楼层
1#
发表于 2024-3-31 17:18 |只看该作者 |倒序浏览
|招呼Ta 关注Ta
网上好看的动漫图集,如果手工下载太费时了。简单分析下网页地址规律,写个多线程脚本搞定。
  1. #!/usr/bin/python) P\" O8 A1 Z( `2 Q* F9 ~9 I
  2. # -*- coding: utf-8 -*-\" \+ x8 C1 I0 ?( c# r\" ~5 ?
  3. # filename: paxel.py
    % k1 q: w8 s4 ~4 p& M# |

  4. ) E# t2 g6 [\" x- ?' H
  5. '''It is a multi-thread downloading tool
    6 a! s% a6 F% H( `2 `

  6. ! |# G. K8 w2 c2 P
  7.     It was developed follow axel.
    : ~7 X+ y7 g1 X1 b- Q
  8.         Author: volans
    - e# X# D8 w\" {. t' {
  9.         E-mail: volansw [at] gmail.com
    \" B: q+ m) s- [% R$ i  x; D
  10. '''3 J% q2 W3 n: c' L8 E* y$ \
  11. + U2 @  U% P; G. L$ l' b
  12. import sys( O) s+ b) ~. e' \+ q4 w9 [
  13. import os7 w. T  g% r0 g  C9 `( S; ]: L
  14. import time# K7 }4 Y\" l0 i) |9 D
  15. import urllib, C! I# z5 X) @/ x
  16. from threading import Thread7 f) d+ k4 t- a2 B
  17. 2 g$ o9 B5 @( S* g  Y
  18. local_proxies = {'http': 'http://131.139.58.200:8080'}
    2 o( |  [& {8 e  D! x1 a+ \6 U

  19. \" s1 \1 _  C% Z
  20. class AxelPython(Thread, urllib.FancyURLopener):
    . P/ r( ~: Z4 I
  21.     '''Multi-thread downloading class.: \3 C, \, Z  x& r* {
  22. ! b6 g/ ~/ j' o, V2 L8 m
  23.         run() is a vitural method of Thread.. a0 ^( E. [- I9 K* O6 ?& _
  24.     ''', [- Z( t- \6 _: m9 w  E3 n, q+ j4 y' g
  25.     def __init__(self, threadname, url, filename, ranges=0, proxies={}):
    ( n- l( Z! W- Z: y, a! ~1 }' o+ S
  26.         Thread.__init__(self, name=threadname)7 T# J0 ?% G. q9 v6 s9 A3 w- E
  27.         urllib.FancyURLopener.__init__(self, proxies)+ y) l* ^: z% S9 j  }' `
  28.         self.name = threadname+ a0 E& G* G* j/ i
  29.         self.url = url' m7 L+ S5 X) E4 p: r. o
  30.         self.filename = filename
    0 {1 R) j. L& q; H6 v' K8 S7 _
  31.         self.ranges = ranges
      {5 O3 _\" x4 o* z' Y: u7 }
  32.         self.downloaded = 0( v6 r# D/ i# U6 }, {/ k
  33. ( q) M- B3 d\" w
  34.     def run(self):% E* L/ X# A6 U3 E1 v: K\" |  k) ~
  35.         '''vertual function in Thread'''
    4 U0 ^1 z5 |! I5 x( j( w
  36.         try:
    2 E  s& K* p  W4 W
  37.             self.downloaded = os.path.getsize( self.filename ). U5 s: _% H( T$ x% I
  38.         except OSError:, F$ C0 b3 o5 }
  39.             #print 'never downloaded'1 @\" s\" F; n. o  f* m' R
  40.             self.downloaded = 0
    ' U5 K7 A9 p; t: `

  41. 9 j\" ]( k- H* o& c& m& [
  42.         # rebuild start poind
    & t/ s) q/ y1 |\" {1 b! S
  43.         self.startpoint = self.ranges[0] + self.downloaded
    . Z* O7 \) j. v( ~/ W/ @
  44.          
    ( ~( t( [+ H- u/ e! d
  45.         # This part is completed
    9 c! M0 _! d: R2 Q# M% a# x5 K0 c
  46.         if self.startpoint >= self.ranges[1]:
    ( t/ i+ B4 I/ [- `
  47.             print 'Part %s has been downloaded over.' % self.filename# V) h' ^, E- H6 {9 T; E
  48.             return
    ( k! Q6 a) ~1 ]8 l& M7 i
  49.          , m5 X0 [: z\" \+ g' ~
  50.         self.oneTimeSize = 16384 #16kByte/time, S& W' t5 R4 ^6 g- k4 r4 u
  51.         print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])) A; D: R' [7 V  ^' K

  52. 3 g% [9 U$ {3 O+ v8 F; b0 U
  53.         self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))/ {( D! O* y) {/ i  h3 `- {
  54.             
    ! n2 J5 I: v* Q% Z
  55.         self.urlhandle = self.open( self.url )5 {; y1 c0 l& u/ y7 [5 S
  56. 5 \5 B. i+ ?; [' E
  57.         data = self.urlhandle.read( self.oneTimeSize )4 {: E0 H\" h' S# |
  58.         while data:3 l& I5 d# s  i( h5 P6 T6 A, X
  59.             filehandle = open( self.filename, 'ab+' )
    $ m3 o/ V+ G3 }& B/ C, T; U
  60.             filehandle.write( data )( Q. }0 y; G$ `. Q7 h# i  \. F' {
  61.             filehandle.close()
    ! s( S) E\" F) b: I1 {3 ]  m% r/ s\" x

  62. # j0 N9 j  G: d7 c: Q; X/ f3 g' v
  63.             self.downloaded += len( data )6 R  l2 @7 `  m8 ?, q% S
  64.             #print "%s" % (self.name)* M8 z\" D) Y4 m* d8 z0 E
  65.             #progress = u'\r...'
    - `\" ~4 m% e- F% G1 P! d2 W3 ?- D
  66. 5 j0 Q\" |/ B1 G; Z7 o
  67.             data = self.urlhandle.read( self.oneTimeSize )
    5 |1 w' v' M, y' l1 x
  68.          
    * {  E9 N5 j0 Z1 U\" A
  69. def GetUrlFileSize(url, proxies={}):
    2 Q+ i0 o+ c9 B+ E* W7 J
  70.     urlHandler = urllib.urlopen( url, proxies=proxies )
    8 x  t, F\" z1 E) g7 h# ?
  71.     headers = urlHandler.info().headers$ b0 U' i! {7 h
  72.     length = 0
      c; C8 M, H) A\" T/ U
  73.     for header in headers:
    ! R& `\" Y8 N3 ]7 b5 L\" |
  74.         if header.find('Length') != -1:2 \! M! u5 R$ y2 L  r5 |
  75.             length = header.split(':')[-1].strip()& @9 I# S  b3 h- Q$ D
  76.             length = int(length)
    8 Y8 m. c2 J\" b1 |+ Z
  77.     return length
    ! t  T! c# H- x0 @

  78. 4 d, U6 @+ r/ n! B; a$ Y& B
  79. def SpliteBlocks(totalsize, blocknumber):
    0 A8 e( G9 R$ G
  80.     blocksize = totalsize/blocknumber
    % l% w* F0 _4 G3 K0 Q/ K4 K
  81.     ranges = []
    6 e$ |( r+ M. E5 _/ Q' u' R
  82.     for i in range(0, blocknumber-1):
    ' M! l* A6 @0 J. w8 Z
  83.         ranges.append((i*blocksize, i*blocksize +blocksize - 1))
    0 S. s- B( E5 b( [& a; g4 l
  84.     ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))# H. w# O8 G) ~2 p# G0 o! ?  g
  85. 0 K/ M' v/ N( o
  86.     return ranges
    : z* a0 _7 j, \3 e& v4 Y
  87. def islive(tasks):
    6 r) l( `) r1 [/ d! C) L
  88.     for task in tasks:% W5 T3 Z1 b* K\" J8 T
  89.         if task.isAlive():
    - `# e( @( X, Y. D/ x3 K
  90.             return True+ p6 h. u% l* q- z1 p
  91.     return False$ {. _# z  f/ q) W; r7 Z
  92. . b  k4 ~7 R$ h' F9 V2 @9 p5 z
  93. def paxel(url, output, blocks=6, proxies=local_proxies):
    + _1 }3 A# s  S
  94.     ''' paxel
      o7 @6 e8 I2 \% c
  95.     '''
    3 r/ {1 D8 D7 r4 e0 B
  96.     size = GetUrlFileSize( url, proxies )2 `, @: b2 ~! R/ a' F* z) ?
  97.     ranges = SpliteBlocks( size, blocks )
    / Z& M  g# ?' d9 @6 Q

  98. 8 l- p! ]3 h6 a& C4 [8 Q; k\" O& v
  99.     threadname = [ "thread_%d" % i for i in range(0, blocks) ]
    $ q' L4 m# h+ ^/ l, K' t
  100.     filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
    : A& @\" }! I) x( e4 e' z- Z
  101.    
    ( R4 ^& d' L: ?& H
  102.     tasks = []
    \" @3 q: ~, O; B% P* Q' V
  103.     for i in range(0,blocks):# s4 q9 k* m( }! g5 o/ k
  104.         task = AxelPython( threadname[i], url, filename[i], ranges[i] )- |9 |% Q\" v. Q  |
  105.         task.setDaemon( True ), y0 ]4 B1 e: C% c$ {
  106.         task.start()
    2 a2 j+ Q$ h/ w: A. F9 z1 c
  107.         tasks.append( task )1 C  r; J6 k& j$ T  z& l\" \
  108.          9 U4 Z( c( z' Q2 k2 d( a5 P; O
  109.     time.sleep( 2 )2 z) q/ E' H2 l+ ]  L0 q
  110.     while islive(tasks):
    1 D( O: L# v- ^* P5 F; n
  111.         downloaded = sum( [task.downloaded for task in tasks] ), Z1 f8 b: H% y5 t& y/ E
  112.         process = downloaded/float(size)*1005 {& {; G\" n7 J) R
  113.         show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
    4 A6 B5 S: `, ^\" C( I
  114.         sys.stdout.write(show)
    7 P& Z# I( l+ X, ?7 g1 B  V
  115.         sys.stdout.flush()- b( ?. R\" y\" c$ Y
  116.         time.sleep( 0.5 )
    ) M! O$ p& E9 n) V) K) E% }
  117.             
    & h3 Y8 e1 C$ U, ^; ~/ C7 c
  118.     filehandle = open( output, 'wb+' )
    % C1 F2 X- y$ F8 O
  119.     for i in filename:5 ?4 Y3 ?; O) c
  120.         f = open( i, 'rb' )/ `% ^, V/ b2 e9 C& O
  121.         filehandle.write( f.read() )
    1 d* N\" g7 j8 n9 Y9 ?3 }4 _( d
  122.         f.close(): l$ Y, r. w: E1 Q9 U+ e# _; T
  123.         try:
    6 t5 F2 G- l0 ?2 k8 E5 s
  124.             os.remove(i)# o; N! a8 {& c8 ]- |$ W0 D, l
  125.             pass
    8 C5 U. J3 {9 g- b2 y$ J; y
  126.         except:* y6 V* W% i7 x
  127.             pass$ L; i\" t$ R, b4 \
  128. $ i\" S9 ^8 j5 `) T
  129.     filehandle.close()
    1 \& p. }  _6 u: c* Z) D! ?- `

  130. % r# a7 P  w8 j: o2 d2 v3 E
  131. if __name__ == '__main__':
    ( R! P7 I6 x& U, U' a1 y. W* R
  132.     url = "http://xz1.mm667.com/xz84/images/001.jpg"
    : r7 o  d2 r8 T
  133.     output = '001.jpg'
    2 q) x/ S3 d' w+ k
  134.     paxel( url, output, blocks=4, proxies={} )
复制代码

' F. x# N: A' }: I( i
zan
转播转播0 分享淘帖0 分享分享0 收藏收藏0 支持支持0 反对反对0 微信微信
您需要登录后才可以回帖 登录 | 注册地址

qq
收缩
  • 电话咨询

  • 04714969085
fastpost

关于我们| 联系我们| 诚征英才| 对外合作| 产品服务| QQ

手机版|Archiver| |繁體中文 手机客户端  

蒙公网安备 15010502000194号

Powered by Discuz! X2.5   © 2001-2013 数学建模网-数学中国 ( 蒙ICP备14002410号-3 蒙BBS备-0002号 )     论坛法律顾问:王兆丰

GMT+8, 2026-4-19 09:33 , Processed in 0.462619 second(s), 51 queries .

回顶部