- 在线时间
- 7 小时
- 最后登录
- 2021-1-20
- 注册时间
- 2021-1-9
- 听众数
- 3
- 收听数
- 0
- 能力
- 0 分
- 体力
- 23 点
- 威望
- 0 点
- 阅读权限
- 20
- 积分
- 10
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 7
- 主题
- 7
- 精华
- 0
- 分享
- 0
- 好友
- 0
升级   5.26% 该用户从未签到
- 自我介绍
- 我本名为我,那就是我
 |
 - #!/usr/bin/python6 ?% u6 U3 c3 K# K7 p
- # -*- coding: utf-8 -*-; x2 e5 o3 j P# a' _
- ( O; X4 n( M2 v; iimport sqlite3; d$ a7 T7 J, `- G% X8 X* ?2 ^3 e
- import requests* L$ T8 T0 L0 b( ]9 w5 e1 S1 H
- from bs4 import BeautifulSoup/ D& [, I4 {# ~
- from re import escape/ f, J0 f( |/ u0 t/ a% m8 z; P# K9 P
- 6 l6 ^# i9 ~: T S. X- g. Gif __name__ == '__main__':
- . `4 h8 }# j4 b$ V& o) q; Q conn = sqlite3.connect('Python.db')* S& v* i+ l e( N* a8 R
- c = conn.cursor()3 y( y) M1 C7 Z& S& Q+ q& o
- c.execute('''CREATE TABLE IF NOT EXISTS Python (
- . @\" m; f: w8 |& o$ u Url VARCHAR,
- 4 i4 Y1 S0 i8 R9 q% n* C! e Title VARCHAR,, {1 d& V6 ~. d! ?( Q g
- Author VARCHAR
- / w& v' E* X+ X7 A( n& x* @: Z )''')
- 5 s T: [) v; E' R conn.commit()
- ) p K# A) ?\" U# L9 R, k# l% t- L ^+ b E9 m9 V7 @/ h
- # --------------------Split Line--------------------
- 0 V+ ~% C7 f- L, C4 ] ]3 H) g headers = {
- - S0 z6 x5 P2 x "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"
- , | B, y% ?4 p( n) R. n }) t* p$ T6 I. C+ `6 P: K6 r1 F\" b
- % ?4 f# u3 c\" c5 x& R
- for i in range(1, 1046):1 i! C$ p4 ]\" I\" M, h4 b L: g
- url = "http://xxx/index_%s.html" % str(i) F( S. K$ R1 B2 l3 i- j
- req = requests.get(url=url, headers=headers)
- # {7 ^! T* `1 @\" G req.encoding = "utf-8"
- 7 z- c! i: p/ f; W: `+ v html = BeautifulSoup(req.text, "lxml")% [4 l& y\" ^& t
- 4 E0 ~$ @( P: `2 { # --------------------Split Line--------------------
- 6 r8 w+ N) P% u& E8 m for div in html.find_all('div', class_='loop'):
- 8 Q) [8 s\" f$ R' @5 }. {$ H4 @9 u6 I5 n content_body = div.select('h2 > a')[0]
- \" F* y6 Q3 z# s6 e5 p content_infor = div.select('.content_infor > span:nth-child(3)')[0]) U5 I3 m4 e% N$ H0 ?3 o
- - {1 E& u& L* `\" _! C. B # --------------------Split Line--------------------
- 1 F @4 T- o5 G6 A! G cursor = c.execute(( ]\" F5 [; W2 I K
- "SELECT COUNT(*) FROM Python WHERE Url = '%s'" % ("http://xxx" + content_body.get('href')))5 ]! ^; s& K; U) O; B% E) K: V0 a
- len = 0% g o. ]2 [) u& ]7 W f
- for row in cursor:
- + s4 e) x, p. Z B! E len = row[0]# o\" H- w$ w0 F n
- if len > 0:
- # p, ^, {# v& e; u% V6 Z continue
- - K3 n9 M, c( M5 R* U! C* x/ }# M$ y S0 P
- # --------------------Split Line--------------------. c( v/ k& @5 R7 q! T' o! S
- c.execute('INSERT INTO Python( Url, Title, Author) VALUES ( "%s", "%s", "%s")' % (
- , w# m2 \0 D9 g "http://xxx" + content_body.get('href'),. q+ |8 ^ a2 u8 Q3 y\" Q
- escape(content_body.get('title').replace("\"", "\"\"")),
- ' V* P- l3 T# I1 r\" c0 l content_infor.text.replace('xxx: ', '')))
- ( D: v+ I y8 r L2 Q0 f1 S
- 1 w) j k A2 h4 |2 ? conn.commit()- _( S% w) K& E8 r T
- print("第%s页" % str(i)). _3 j9 }3 p) R8 K8 X, e1 v
- ! ^4 J8 w) @1 @; l! s\" G4 C8 O! _+ o7 M# c
- # --------------------Split Line--------------------
- 5 I X: v% X! p7 n) Q- x/ @ { conn.close()
- 8 h' s Q% A0 ^. `\" ^4 z
! j* C5 y/ a; m
. V+ k% \" t. b) J5 F/ A6 n* P
Python交流群:1047602540 % N; c" v, K3 F' H9 T- B4 [" _) c
|
zan
|