- 在线时间
- 7 小时
- 最后登录
- 2021-1-20
- 注册时间
- 2021-1-9
- 听众数
- 3
- 收听数
- 0
- 能力
- 0 分
- 体力
- 23 点
- 威望
- 0 点
- 阅读权限
- 20
- 积分
- 10
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 7
- 主题
- 7
- 精华
- 0
- 分享
- 0
- 好友
- 0
升级   5.26% 该用户从未签到
- 自我介绍
- 我本名为我,那就是我
 |
 - #!/usr/bin/python% l+ R0 @4 n5 _7 ~2 }3 u5 @
- # -*- coding: utf-8 -*-5 `2 v% v' r4 @& J
- ; A O. L) f, ?
- import requests4 r: ^4 }# ^% W
- from lxml import etree9 a3 C$ e$ w% b6 K
- import sqlite39 C- e2 p4 o\" M\" L' K
- $ P5 q$ f/ h+ K' R
- 3 G# E\" L0 A0 u# N& Z# |* l( fdef write_sql(c, text):6 w3 h, x5 P/ j/ K; N' f1 j( V8 M
- html = etree.HTML(text)
- 7 ~2 d, K( @* ]8 v5 q3 F7 o; C # 标题$ p9 B; D, X( X& j. ]\" e3 U
- titles = html.xpath('//ul[@class="news"]//a[@target="_blank"]/p/text()')7 W& r$ y, \4 `5 z
- # 链接
- # G+ S2 @9 Q$ z/ p hrefs = html.xpath('//ul[@class="news"]//a[@target="_blank"]/@href')4 B) D K9 A; h( a' T\" ]
- # 日期' M2 u) p3 }& A' f$ P2 |8 X
- ems = html.xpath('//ul[@class="news"]//a[@target="_blank"]/em/text()')
- ! ^+ y, G3 |, y7 H$ L! A: I
- 1 Q\" p8 z4 K5 Z\" D$ X i number = 0
- 0 t9 @& g$ H! U d3 a* D3 o& a6 h for title, href, em in zip(titles, hrefs, ems):
- - Y* F- w J) ~, _/ {1 s- b& v href = host + href
- 8 I9 e8 Q0 O2 c. p$ `, u cursor = c.execute(# y0 w/ g7 u9 ?0 W
- "SELECT COUNT(*) FROM Python WHERE Url = '%s'" % href)\" a/ T6 s: E) U! f/ [: h
- res = c.fetchall()
- 4 K; r7 f7 U* ~& r! T9 }- ? # 判断该字段是否已存在
- 7 t! I9 E% M. } if res[0][0] > 0:' [3 x& d7 j! b, V' N! k
- continue\" t0 k, s' l* p& N
- 0 Q# c' v* W) f% R
- c.execute('INSERT INTO Python( Url, Title, Author) VALUES ( "%s", "%s", "%s")' % (
- # r# l$ D' M3 I) T: I href,
- : g3 z- t0 @) X5 k5 R9 J title.replace("\"", "\"\""),. R2 s0 Z8 J: Q\" g
- em))
- ' O: i* u8 k& p/ s3 b1 ?( s number += 1
- ' u4 `+ S/ h. l. l7 A- V7 F% Z0 { print(title, href, em)
- ) n5 w' @+ L+ U+ }! i4 ^3 y/ v3 L. N: M. T7 W
- conn.commit()
- 1 B7 a- e/ t0 ]0 b; V t return number > 0
- * V! n8 i& m7 Z2 `! P7 x# ]9 W& d) l q+ h& w
- ! k/ q# j( U3 [& \if __name__ == '__main__':: E\" k' j5 l: B
- 5 A7 q3 E W3 Y. J9 q) G, e5 R* c
- conn = sqlite3.connect("Python-xxx.db")
- , I+ [/ z6 W3 C. L' Y c = conn.cursor()
- ) h\" n* ]: J9 T* g& H c.execute('''CREATE TABLE IF NOT EXISTS Python (
- \" S& y/ |# r) a5 x0 g Url VARCHAR,1 @ l' S& V O- Z' Y2 O
- Title VARCHAR,! e% m% P, Q$ Y) o1 u5 j) _
- Author VARCHAR
- $ y1 Z7 p& J/ B' q+ Q+ i )''')
- ; }* D# c. U7 N% P8 }- r4 x conn.commit()' a, ^8 f% L/ ]5 g
- ( u3 G/ v# P3 T3 a% t host = "https://xxx"
- : V( I8 |9 U0 t2 g4 Z0 ?7 P0 ] url = host + "/xxx". D\" R0 l& P0 f9 R& V1 F\" p
- req = requests.get(url)1 O2 L. ?' u6 o+ n) [
- req.encoding = 'utf-8'
- \" G s6 f1 U; i3 I # print(req.text)
- U: Q5 \ t7 _8 S
- 6 C\" e+ w/ }0 G' n d html = etree.HTML(req.text)
- + G9 ^# K6 d! b\" p1 y e5 l4 X- t clearfixs = html.xpath('//*[@class="nav clearfix"]//a[starts-with(@href, "/cate/")]/text()')1 @1 j\" y2 Z$ e M
- hrefs = html.xpath('//*[@class="nav clearfix"]//a[starts-with(@href, "/cate/")]/@href')
- ! [) n, v' a. B/ @/ _& P# f # print(clearfix, href)8 W: H6 A* ?+ Y M
- , S: o, @8 C X# _$ O# Z a5 p for clearfix, href in zip(clearfixs, hrefs):+ ?& l8 x, c$ {7 ^# T! s% J8 Z/ ~
- print(clearfix, host + href)3 m; u9 U( v7 y& @
- @: S- H# w& k! Z' R, K page = 1$ }9 `* G+ M8 g, n* K) Q
- while True:7 S( K; v+ J6 H+ t+ b
- url = host + href + "/list_%s.html" % page
- # q% K- g9 p0 I4 _8 I* ?0 d5 K req = requests.get(url)# @+ y* a6 a+ K0 V6 O3 S
- req.encoding = 'utf-8'
- 2 j/ E* V\" v* l: ?) j9 x
- H7 T3 o3 B; j6 R$ ^% w if (not write_sql(c, req.text)):
- $ d$ |3 s3 O @2 @ break
- % n% W( S; h9 k6 _0 f/ ^. n* X) m3 s* p& d( t d. r. N0 q' ~
- print("第%s页" % page): d; ~. g# |( ~$ m* T9 m2 W
- page += 10 k8 X% G; Z- r5 Y: @8 Q0 v
- % @. w: E# ?$ D# B: e/ D
- conn.close()% x$ [) \) P0 I$ {7 x# w, k
8 O* S4 z& q; j. Fxpath用着就是舒服~
2 j7 B1 }' k) K2 e9 _0 m" s, H3 u! H7 p- a
Python交流群:1047602540 8 `- I0 D% m' z) C+ G
# {" d* \) d* Q( ?% f& Q9 E E- U+ A. v2 p
|
zan
|