- 在线时间
- 7 小时
- 最后登录
- 2021-1-20
- 注册时间
- 2021-1-9
- 听众数
- 3
- 收听数
- 0
- 能力
- 0 分
- 体力
- 23 点
- 威望
- 0 点
- 阅读权限
- 20
- 积分
- 10
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 7
- 主题
- 7
- 精华
- 0
- 分享
- 0
- 好友
- 0
升级   5.26% 该用户从未签到
- 自我介绍
- 我本名为我,那就是我
 |
 - #!/usr/bin/python0 u. N) \, @- E& V% A
- # -*- coding: utf-8 -*-
- : t5 H2 C$ M& U0 {; {& d1 {5 m. e, N. s, T( J! ?
- import sqlite3& @' o2 k5 M, p6 Q2 t/ D/ g
- import requests
- _6 D$ b4 H) k\" Rfrom bs4 import BeautifulSoup
- 0 i' ?\" \2 P5 L. C4 \from re import escape
- ! R6 }; f6 D8 z, p
- 7 P- @. I8 ?( V' oif __name__ == '__main__':
- 5 h3 y, {* S6 n m) T0 U' K conn = sqlite3.connect('Python.db')
- 6 J8 x: |/ g8 ] c = conn.cursor()
- / h {9 X( N9 t. U- G$ b# g c.execute('''CREATE TABLE IF NOT EXISTS Python (0 {& e# Q+ K8 T
- Url VARCHAR, e\" H% j1 @3 T0 `: D
- Title VARCHAR,
- 7 P4 g6 u5 I5 ^9 g$ F/ [% j3 q Author VARCHAR' S. a; t+ r) b. e4 U- ]1 O! c
- )''')
- 2 Q; p, F\" A% w conn.commit()) t8 Q\" k X' I2 a& J
- # C7 N9 n' |& W# T/ ]1 D # --------------------Split Line--------------------
- 3 k) K8 q2 j1 d5 K. m/ N% P headers = {
- 7 D( f( t% c* |% G. T "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"
- $ B) F5 ?! i- S$ C3 h; c: i }
- & ^4 t. K7 c* ]& Y6 D\" j
- ( N. D9 l3 h0 M' q for i in range(1, 1046):
- - e/ f2 w4 v3 ?1 j url = "http://xxx/index_%s.html" % str(i)3 r0 a& i6 c\" m; q' \7 Y
- req = requests.get(url=url, headers=headers); m* z% l }- t' S; |' g0 L
- req.encoding = "utf-8"! D& i' ~, Q. [& ?
- html = BeautifulSoup(req.text, "lxml")
- ' }/ l) ^) g1 D7 w( ?* y' f5 ]. }3 J. B: ]; R$ y% S! U( u6 ^
- # --------------------Split Line--------------------4 P1 T8 z% D/ e' L ]
- for div in html.find_all('div', class_='loop'):* K6 L0 b- l8 N/ \+ T9 [
- content_body = div.select('h2 > a')[0]* X' ?5 a: U0 q# n: Q\" _- E9 k
- content_infor = div.select('.content_infor > span:nth-child(3)')[0]8 H( Y( g+ r5 V/ x
- * R s\" E# e/ N2 C& R5 r! U; b
- # --------------------Split Line--------------------3 B. d! B7 J i; K% N
- cursor = c.execute(
- ; ]0 [! ]% [- q I! } "SELECT COUNT(*) FROM Python WHERE Url = '%s'" % ("http://xxx" + content_body.get('href')))
- . [# d\" N- q8 F7 y6 e$ v) F1 B4 b2 T len = 05 t: ]* Q3 j6 l3 y
- for row in cursor:
- & D7 x6 {/ j0 l\" w& `. P6 B: f len = row[0]% e3 D% S2 |) o6 k
- if len > 0:
- 9 Q+ N. F& ~* @* ?; O continue
- ' ~4 g7 I! \0 l7 z5 F6 \! H4 r: d3 b& L/ N# J+ P
- # --------------------Split Line--------------------
- : Z$ d! W8 m8 K7 S( C c.execute('INSERT INTO Python( Url, Title, Author) VALUES ( "%s", "%s", "%s")' % (\" I* @1 ~& o\" n6 r& J6 [+ A
- "http://xxx" + content_body.get('href'),
- ; S7 T9 T& {( s4 F6 f escape(content_body.get('title').replace("\"", "\"\"")),
- : `8 U+ P7 o3 C* J7 X: x content_infor.text.replace('xxx: ', '')))* `) z) X/ ]* u, i
- 0 G& Y9 b3 l! Y5 S! l- F\" J
- conn.commit()
- 2 E7 R\" Y6 d6 m! K print("第%s页" % str(i))3 ]; n- D' |; D8 Q7 P
- . |' i. F6 b( P; D
- # --------------------Split Line--------------------. [! n. ]8 i1 M: z# j4 y
- conn.close(), A! X) I' P! u5 ?
& M. y# m w( ]4 \) ?, F h L. {/ i6 w
Python交流群:1047602540
, ]% r U2 q5 }6 s6 n: } |
zan
|