- 在线时间
- 7 小时
- 最后登录
- 2021-1-20
- 注册时间
- 2021-1-9
- 听众数
- 3
- 收听数
- 0
- 能力
- 0 分
- 体力
- 17 点
- 威望
- 0 点
- 阅读权限
- 20
- 积分
- 7
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 4
- 主题
- 4
- 精华
- 0
- 分享
- 0
- 好友
- 0
升级   2.11% 该用户从未签到
- 自我介绍
- 我本名为我,那就是我
 |
 - #!/usr/bin/python
- ( X% G- y0 b, I8 F8 O8 I8 {) k# -*- coding: utf-8 -*-
- * t/ |/ X6 X# A
- & U& T; [! @% F( [\" f; Nimport sqlite3
- . d# |( I- E% T' Uimport requests, @0 \3 w4 z\" _ `$ ]. F
- from bs4 import BeautifulSoup
- 4 |) W: l1 k+ y! X( {1 D* ifrom re import escape
- ! E. X7 r) f [- p9 S L2 ~\" [1 e8 i; n7 B8 Q; U
- if __name__ == '__main__':
- * B' C5 v8 m8 q* E* ?: [ conn = sqlite3.connect('Python.db')
- ' _- F3 x* S/ n c = conn.cursor()# B2 F) W( Q5 H1 W4 w: Z7 D
- c.execute('''CREATE TABLE IF NOT EXISTS Python (& W9 z( _$ o! \
- Url VARCHAR,0 r/ u/ _* {6 Q! {
- Title VARCHAR,
- : T1 ^, w1 g) ` Author VARCHAR% P+ B1 K& i0 u( x
- )''')9 ^2 ]+ ^6 S/ C) d- T, ^
- conn.commit()3 N1 N9 L% k7 x# h9 r- s
- / g( t\" i- t0 X# G* K # --------------------Split Line--------------------
- + C ~\" F: p; q$ k# E headers = {' V: w/ l7 g( |3 s+ G
- "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"6 k# p# B) p/ u# ~3 p
- }
- : T& o b# L2 g\" [/ W\" d. T/ G: w+ I' U% Y& |% I
- for i in range(1, 1046): D+ Q! y* |- r. @
- url = "http://xxx/index_%s.html" % str(i) u! A' y2 E. a4 _) l
- req = requests.get(url=url, headers=headers)& u/ `9 m( X: R8 C9 o4 r% k8 e
- req.encoding = "utf-8"$ t- E) `/ u+ `$ F- e: B
- html = BeautifulSoup(req.text, "lxml")% G* h+ L# G& |3 ^( ?; M# S0 l
- 2 R/ V8 W+ _$ v* g # --------------------Split Line--------------------
- 3 y( v/ p! p1 w. b# I3 \- p6 o for div in html.find_all('div', class_='loop'):( i# \/ }# r, w. p+ a3 k% t
- content_body = div.select('h2 > a')[0]
- 0 M# B2 F: v- N: Z0 H' X- \5 l content_infor = div.select('.content_infor > span:nth-child(3)')[0]9 r- L) f8 G u9 A$ Z
- 3 x3 p8 L+ f. P5 d # --------------------Split Line--------------------
- $ a. m% \. i6 J6 A, x3 k+ U cursor = c.execute(- K* v8 A' o- r5 W5 t$ `\" y
- "SELECT COUNT(*) FROM Python WHERE Url = '%s'" % ("http://xxx" + content_body.get('href')))0 j! i; m) M7 h& ~7 v6 Z
- len = 0
- 4 p, l\" E+ p\" E- y3 T for row in cursor:( |' C: a& l4 A' Z- `
- len = row[0]
- ( e\" H& Z5 M4 Y/ o! I if len > 0:\" p! N\" n8 O. G
- continue# J7 K$ b1 h( _8 K: g3 v& l
- ! D0 q( C1 z* t1 t: K\" _) x! q' x # --------------------Split Line--------------------
- o4 q; {. S\" T% Z+ v c.execute('INSERT INTO Python( Url, Title, Author) VALUES ( "%s", "%s", "%s")' % (
- & I7 _' y8 s8 g' ^ "http://xxx" + content_body.get('href'),
- % x1 T0 P7 c' R+ R8 m escape(content_body.get('title').replace("\"", "\"\"")),
- 1 V6 @ z) q, y1 d5 U; {( ~* A) h: Z content_infor.text.replace('xxx: ', '')))
- 4 W, s5 b- \8 `9 M* n$ N2 q4 V) c; X* O% R\" c. I% J' Y/ W
- conn.commit()
- $ |# ~: u7 W! d0 E4 ]\" K print("第%s页" % str(i))1 ?, s& H- Z! }5 A. P\" @4 I1 O
- 0 y4 s: Z9 A9 C D- @5 [# e
- # --------------------Split Line--------------------\" t' e( [& _4 C* `) G
- conn.close()4 Q3 H# ~\" S0 _6 T$ n7 P
9 p8 O" E* |; A3 e+ V8 {. H. X. [7 U
Python交流群:1047602540 d& _6 ]5 O6 j; H- F* V) S
|
zan
|