- 在线时间
- 7 小时
- 最后登录
- 2021-1-20
- 注册时间
- 2021-1-9
- 听众数
- 3
- 收听数
- 0
- 能力
- 0 分
- 体力
- 23 点
- 威望
- 0 点
- 阅读权限
- 20
- 积分
- 10
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 7
- 主题
- 7
- 精华
- 0
- 分享
- 0
- 好友
- 0
升级   5.26% 该用户从未签到
- 自我介绍
- 我本名为我,那就是我
 |
 - #!/usr/bin/python* l: o S3 M5 K, ?9 i/ f/ ?. I1 h$ Z
- # -*- coding: utf-8 -*-- _% p+ y* j$ z: G) m
- ; z4 |! s, U' D3 ?' t7 _
- import sqlite3
- / V1 |) k( M# T% j4 Timport requests0 ~+ R$ o; t6 y% x
- from bs4 import BeautifulSoup9 w5 g% I3 J\" e9 M- b G% A; l' L
- from re import escape# H6 {1 p, t* I3 @- E0 }\" U
- $ p$ Q' j. q& o
- if __name__ == '__main__':) B/ |2 Z3 E3 B* r# s0 X* J& ~
- conn = sqlite3.connect('Python.db')! p\" T+ ?2 a# \$ \\" G- \5 Y
- c = conn.cursor()
- & ~( q( M% _: a( g' d c.execute('''CREATE TABLE IF NOT EXISTS Python (4 g& i' Y4 G% Y7 o+ f/ H: s
- Url VARCHAR,
- / e+ W# p, m1 J: G& y Title VARCHAR,9 j5 q: d6 _7 w3 V. e& q: d
- Author VARCHAR# ~# m/ }! e! A: k& f
- )''')
- ; v |6 d9 _( X$ {! S% L conn.commit()
- & u* t/ A u! y\" o5 r
- & M7 l9 ?- ~2 @+ B! R # --------------------Split Line--------------------% ~. g& q6 Q! N6 H6 _
- headers = {
- ; I9 E; w9 d5 M: H. y "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"1 Y+ Q4 s* n0 a. z1 u7 Q0 {
- }
- 6 D6 ], \6 w0 W- C6 r# S$ Q( s [\" z
- 3 [' d# k4 Z |+ k for i in range(1, 1046):% x; i! c3 h5 A8 G( ]; n$ S
- url = "http://xxx/index_%s.html" % str(i)
- $ \\" w- {0 j9 U y4 ~5 ^2 {1 v7 O req = requests.get(url=url, headers=headers)% i7 `4 B- O$ N1 W# _
- req.encoding = "utf-8"! R( v' C* E) d4 ?
- html = BeautifulSoup(req.text, "lxml")0 e4 E; L q# H2 E. `2 H* ~3 |
- 6 f) J. F/ H: _, N& ~: P& U
- # --------------------Split Line--------------------
- 3 b1 @, _& p# Z& Y for div in html.find_all('div', class_='loop'):5 p' e9 ?0 P0 G% Z! a
- content_body = div.select('h2 > a')[0]0 @# H( c' N4 g( ~3 c6 C7 T
- content_infor = div.select('.content_infor > span:nth-child(3)')[0]
- % M) U% i2 _% B& y
- 4 p! r; \/ S$ o M* P6 l% O) D # --------------------Split Line--------------------
- - A D: s5 Q* p! V+ t cursor = c.execute(
- 6 |; ?% w' x9 w4 l' C3 b. V3 R "SELECT COUNT(*) FROM Python WHERE Url = '%s'" % ("http://xxx" + content_body.get('href')))* \; R6 c% n T. i* ^$ h
- len = 0, _+ ~9 L3 \$ `. N L4 I
- for row in cursor:
- & T. |) g/ L. C1 t/ [ len = row[0]
- ! d' i+ f\" _! N, o1 h3 U if len > 0:
- - T& z R ?6 A7 L continue! ?+ p& L* ^3 \. |; A3 u/ ]
- & B, z) B& U$ D+ z+ |7 h # --------------------Split Line--------------------
- 3 Z3 ?8 i+ [* {1 Z% ` c.execute('INSERT INTO Python( Url, Title, Author) VALUES ( "%s", "%s", "%s")' % (
- $ T0 c$ q5 d* _# o4 E- n. k "http://xxx" + content_body.get('href'),
- 4 o, [* [5 s* y0 M) R3 [, l: E escape(content_body.get('title').replace("\"", "\"\"")),3 b' M! o* V, i) D
- content_infor.text.replace('xxx: ', '')))8 u# w, N$ y0 ^) E/ |
- 2 F- a/ W9 t6 {
- conn.commit()
- 1 v, L! q/ Y! v. S; N; ] print("第%s页" % str(i))+ T. ]' I; r8 y; ~. \
- 2 Z* i) Z6 U; H- T5 l # --------------------Split Line--------------------( ?, M! `5 y9 P& h; z# I. t! L
- conn.close()
- $ t5 P! w, V/ j+ K. b\" h* ~
1 X7 w. z, ?8 |. `4 W
. s8 n5 v! C7 {9 p* O! V* E$ g& z
Python交流群:1047602540 ! U: B- {( q" D! v
|
zan
|