- 在线时间
- 7 小时
- 最后登录
- 2021-1-20
- 注册时间
- 2021-1-9
- 听众数
- 3
- 收听数
- 0
- 能力
- 0 分
- 体力
- 23 点
- 威望
- 0 点
- 阅读权限
- 20
- 积分
- 10
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 7
- 主题
- 7
- 精华
- 0
- 分享
- 0
- 好友
- 0
升级   5.26% 该用户从未签到
- 自我介绍
- 我本名为我,那就是我
 |
 - #!/usr/bin/python
- 7 u5 D/ c% o1 x# -*- coding: utf-8 -*-
- 6 p\" u. t( Z- f- b: n) m g9 w. N W% |, Y8 Z4 K \) i
- import sqlite36 `- J* ~- K! e- w9 A& }9 \4 T4 X- x
- import requests
- 4 Z+ i# | }0 nfrom bs4 import BeautifulSoup
- ' q$ J- X\" G& e( E: H1 P& M/ yfrom re import escape7 _+ ]/ Z6 R+ d4 [
- . g5 g2 ]$ B7 G3 Q0 V
- if __name__ == '__main__':7 v @. e' K5 B9 l0 Z0 K
- conn = sqlite3.connect('Python.db')3 n7 ^4 E9 B7 a% a' M
- c = conn.cursor()
- + c6 K) ?\" `\" h+ c% a9 E c.execute('''CREATE TABLE IF NOT EXISTS Python (
- \" l! E- G9 p9 F Url VARCHAR,
- 0 X: d, H, \9 r; t0 U; a- |4 v Title VARCHAR,) z$ ?/ n) }6 m7 Q8 j6 U
- Author VARCHAR# y7 I4 i' `) d8 V+ K4 C- O
- )''')
- ( n0 o\" l0 V0 p! u3 N conn.commit() v9 m5 ~7 S/ m) t) R6 u' E
- 8 o y+ l& A2 d( l$ a/ y# u7 s0 ^ # --------------------Split Line--------------------1 W+ Y) e& c( }. J$ V. i, U
- headers = {
- , C8 k- x+ C4 D6 W/ ?! @; Y) C2 o "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"5 ], ~% c' I3 Q$ k7 ~4 s) @' V0 o) e
- }
- 7 \ M- n& c; d' J( S; e' K
- + z/ o/ b& r% ]1 { for i in range(1, 1046):6 H: ]& m. g4 Z9 U3 u( H. O/ d
- url = "http://xxx/index_%s.html" % str(i)& V$ ^' P `( S1 }
- req = requests.get(url=url, headers=headers)
- 6 y2 J1 ~ F$ b1 [6 N req.encoding = "utf-8"# i- B( {) d, W
- html = BeautifulSoup(req.text, "lxml")\" d; H4 N7 t0 J y$ t3 T8 N# {
- + h k/ y+ S& N, H; t8 M2 V # --------------------Split Line--------------------
- 8 }6 V8 _# r5 ^0 Z4 [3 { for div in html.find_all('div', class_='loop'):
- 1 ~* \% z; l0 y4 @\" ~. R content_body = div.select('h2 > a')[0]
- , q- J; ~& \; \9 P, f content_infor = div.select('.content_infor > span:nth-child(3)')[0]
- % J; P `0 Y+ l6 A: v/ a2 Z/ \* D- z7 W/ |) e. I/ w: {% V
- # --------------------Split Line--------------------
- 8 f4 A: s3 u+ W, A cursor = c.execute(
- % N; ?7 `. U0 W3 o0 i "SELECT COUNT(*) FROM Python WHERE Url = '%s'" % ("http://xxx" + content_body.get('href'))). C6 |9 v4 c3 H- \0 K1 f
- len = 0\" S; ]2 @7 ]: {6 N. g5 t
- for row in cursor:' z& X8 V( A: D' p/ ?
- len = row[0]
- ! r( H5 U! W: W1 A7 Y; ^ if len > 0:8 Q. d/ H\" c1 c/ _! O- o! R
- continue
- - e! ~+ V6 q1 d% ~3 A\" T! ^9 R. N
- + a$ t+ |\" W( {; ? # --------------------Split Line--------------------! b2 ^- v. Z\" j; F* D( J# _
- c.execute('INSERT INTO Python( Url, Title, Author) VALUES ( "%s", "%s", "%s")' % (4 P3 {0 c* F6 G0 B4 ~; m' k
- "http://xxx" + content_body.get('href'),
- ; @, o/ [- G% |* } escape(content_body.get('title').replace("\"", "\"\"")),2 Q9 O! G9 N& J' a4 k
- content_infor.text.replace('xxx: ', '')))0 i0 t4 f$ Q( y; G\" z' M
- 4 n% ]( p' n, A \
- conn.commit()6 b& P- s# s2 @( k
- print("第%s页" % str(i))
- 3 V6 }8 Z8 @! a7 A4 u\" \3 \: I\" M5 B5 X) m
- # --------------------Split Line--------------------: ^* U* }/ ]1 y/ g) c& d
- conn.close()
- . {+ \! w2 f\" I w/ i; ]+ y
' G: i( f# q5 _! ]' Z0 K% t8 e/ v- `% F8 j$ n( @
Python交流群:1047602540 / T- K& x& j0 W: |; d, u
|
zan
|