- 在线时间
- 7 小时
- 最后登录
- 2021-1-20
- 注册时间
- 2021-1-9
- 听众数
- 3
- 收听数
- 0
- 能力
- 0 分
- 体力
- 23 点
- 威望
- 0 点
- 阅读权限
- 20
- 积分
- 10
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 7
- 主题
- 7
- 精华
- 0
- 分享
- 0
- 好友
- 0
升级   5.26% 该用户从未签到
- 自我介绍
- 我本名为我,那就是我
 |
 - #!/usr/bin/python
- 3 F# M& V8 @9 ?) B! `: q# -*- coding: utf-8 -*-. x+ ~, W+ q/ M o9 d2 u! t2 Y8 a5 y
- * q5 P* t: x% n3 ?, Jimport sqlite3
- 7 C! x; O& \8 P6 {, Q! aimport requests* Y. P* i3 q' P
- from bs4 import BeautifulSoup
- 9 H. L( I+ _7 yfrom re import escape
- / ^: J* {$ L% M! h+ x$ j5 r; q/ U2 ?) F' U: N
- if __name__ == '__main__':
- / `$ p/ m& V2 o: }9 ]. J conn = sqlite3.connect('Python.db')
- . _7 B2 P0 E& X7 e0 k' O c = conn.cursor()- n1 H! a; E( j0 T& k* E$ h
- c.execute('''CREATE TABLE IF NOT EXISTS Python (
- - E2 j3 |) l3 D' `) L! x Url VARCHAR,/ B. L; G2 U' i
- Title VARCHAR,
- ' g2 x' X+ Q# I0 d$ S1 { Author VARCHAR
- : G. V6 `5 x2 h3 x' P4 c. ?\" g )''')
- 0 W5 n2 X+ r: s4 L9 l& n conn.commit()
- ' j2 q3 Z# }* x0 _7 w0 z. _
- ! Y7 d. A. Z# G- S& M7 o\" }2 B # --------------------Split Line--------------------
- / \7 u; S/ l# n0 c) w8 S; j headers = {
- ) D4 V9 ~. K# w4 P5 N6 _1 u "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"
- 2 N. F9 O+ I3 E. R: Y }
- : [ O+ a, k! c$ w& w
- 4 e. P8 [$ i0 ?\" i1 G& C for i in range(1, 1046):
- 9 c c7 l0 j6 Y0 h4 P url = "http://xxx/index_%s.html" % str(i)
- $ Z$ I5 B. B. y$ ?\" V req = requests.get(url=url, headers=headers)5 {\" L }' [3 @% k9 T# u4 j9 y5 _
- req.encoding = "utf-8"2 J9 d) ^2 @9 s3 s9 Z
- html = BeautifulSoup(req.text, "lxml")! @: h- x3 q7 x0 A) G3 T5 Q
- + ~. ~% @, d3 b$ n0 U8 M\" x' Q # --------------------Split Line--------------------
- ' s+ Y( d# K8 _6 Z- W& {3 T for div in html.find_all('div', class_='loop'):8 e4 c\" \' q7 G: R8 `
- content_body = div.select('h2 > a')[0]
- 0 \% k+ M- O! G/ w content_infor = div.select('.content_infor > span:nth-child(3)')[0], L& T9 R1 L5 A2 c/ w% Q
- + `1 ~* d, @3 G2 t; K, Q F
- # --------------------Split Line--------------------
- ! s' r- G p! ^; e/ I; n; h5 p cursor = c.execute( {1 z8 r& F! I1 U. A6 N# W( R
- "SELECT COUNT(*) FROM Python WHERE Url = '%s'" % ("http://xxx" + content_body.get('href')))/ ]0 K, Q3 |6 ?, v0 z
- len = 0
- ; u8 f0 B& X# D+ L4 t for row in cursor:5 C) S& C( q$ D X1 J, o
- len = row[0]7 ~5 S4 T\" `, }; t6 B7 L
- if len > 0:# S6 O. C) }2 h* y' L
- continue
- - E+ ^$ a7 s. ?; e/ h& K/ k+ x
- - Z2 a3 x, s) V9 K0 Q; K # --------------------Split Line--------------------
- / J5 M0 ?1 Q$ | c.execute('INSERT INTO Python( Url, Title, Author) VALUES ( "%s", "%s", "%s")' % (
- 3 C. j% O6 w L9 Q2 Z+ S "http://xxx" + content_body.get('href'),
- M0 e; D4 n7 g% z% N6 J\" a escape(content_body.get('title').replace("\"", "\"\"")),3 ]+ [' @4 }$ t( x\" [
- content_infor.text.replace('xxx: ', '')))& k4 R5 w' b; m; _' r l
- $ u8 p$ l4 m+ u conn.commit()- R. [) H/ j. Z: k2 [2 f1 f
- print("第%s页" % str(i))2 `7 {8 Q. s3 @/ M9 i' w* k
- , Q Q5 ^3 w8 U\" K! ~3 |$ U2 o # --------------------Split Line--------------------; j$ j( e) t& U( n9 K
- conn.close()
- ) D- \* ?; n$ E: ^ c i
3 C+ {4 Q( b0 M$ D& v* }* D6 c5 w$ ~' _- j# ~2 I7 s4 U
Python交流群:1047602540 0 ]- D) ?# V% q7 K2 O X0 H" o# N, W
|
zan
|