- 在线时间
- 7 小时
- 最后登录
- 2021-1-20
- 注册时间
- 2021-1-9
- 听众数
- 3
- 收听数
- 0
- 能力
- 0 分
- 体力
- 23 点
- 威望
- 0 点
- 阅读权限
- 20
- 积分
- 10
- 相册
- 0
- 日志
- 0
- 记录
- 0
- 帖子
- 7
- 主题
- 7
- 精华
- 0
- 分享
- 0
- 好友
- 0
升级   5.26% 该用户从未签到
- 自我介绍
- 我本名为我,那就是我
 |
 - #!/usr/bin/python t h4 U( U6 _\" p; i; H4 @
- # -*- coding: utf-8 -*-
- # p' ]; R' |( R: @. Y2 v* w0 F, N\" X0 a: g
- import sqlite3
- & ?# e! y2 E9 B! K |import requests
- 5 Q# v\" a* G$ p9 T8 ffrom bs4 import BeautifulSoup* t5 Q1 M$ Z* B* v1 h7 u+ H
- from re import escape4 _ Q/ X' ]# t. U
- 0 k9 r: J u% R+ O: mif __name__ == '__main__':4 t6 L' P) `0 o
- conn = sqlite3.connect('Python.db')9 _4 `, i8 v! b* }/ r* s
- c = conn.cursor()
- R: n$ K+ `( r+ S; r1 S c.execute('''CREATE TABLE IF NOT EXISTS Python (+ F) t: |. s( {' |$ ]6 o N: h
- Url VARCHAR,9 o, t. U }' u3 t! X m0 t
- Title VARCHAR,
- 5 T. k% F5 c j r! h$ U Author VARCHAR% a0 C/ Z7 E0 @0 R# l
- )''')
- 2 p- b O U, I conn.commit()
- 0 D- Z' P H+ U7 u6 ]' M3 ?
- # m8 V: ^0 m0 _ # --------------------Split Line--------------------
- ; x1 ~5 B5 \8 k1 E) x headers = {
- ' x: l1 d\" f5 d3 z! c' x0 c& J "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"
- % k\" I+ ` r9 ~5 n6 B- ? }
- 4 J% Y- n& q7 }, E0 i\" o: @. Y, ^# P
- 6 m3 R$ J5 P! }$ z6 W, N, j for i in range(1, 1046):: W: w- O3 d# F) e6 g0 l
- url = "http://xxx/index_%s.html" % str(i)0 b( I2 L- i3 K+ c
- req = requests.get(url=url, headers=headers)! k% u: d: C- v! s' j1 [* m
- req.encoding = "utf-8"% z# I' ?7 B7 ~( X\" G3 B' ~& k
- html = BeautifulSoup(req.text, "lxml")3 I+ D0 s5 ~+ I |
- % {' Z* G5 ?0 z1 _ # --------------------Split Line--------------------
- \" @: P4 d, p p4 b& D: j+ z for div in html.find_all('div', class_='loop'):
- ! Q8 N4 ^' p. f! i/ a content_body = div.select('h2 > a')[0]0 ~' [* {/ Y% `* i. j! _0 S
- content_infor = div.select('.content_infor > span:nth-child(3)')[0]
- 4 h9 `/ S* Q& h
- . P\" q% W$ ]* p( b6 q' M( L # --------------------Split Line--------------------
- 9 e* e$ P& V K8 D0 @2 L cursor = c.execute(
- $ P9 E/ ?$ p0 ]# C- ` "SELECT COUNT(*) FROM Python WHERE Url = '%s'" % ("http://xxx" + content_body.get('href'))). A# l* y+ E: X* P2 c
- len = 0
- 6 N% R' a8 h) T8 C for row in cursor:
- # g1 w+ d2 z7 v& B8 _$ I9 F len = row[0]) |0 z6 \6 M2 ~$ U
- if len > 0:& [1 f+ `! [# B, b\" Q* l
- continue3 h3 J& G5 _. A1 J/ K5 ]2 y
- 4 P/ G9 @8 B6 W- o* v h
- # --------------------Split Line--------------------
- ; h- r; \- z\" c) Z0 A0 m c.execute('INSERT INTO Python( Url, Title, Author) VALUES ( "%s", "%s", "%s")' % ($ T; \$ y. i1 X4 @- j
- "http://xxx" + content_body.get('href'),
- # G9 I8 S0 ^9 C! O* y5 q: G7 ~ escape(content_body.get('title').replace("\"", "\"\"")),
- / i( y/ T# `5 J) \# i1 m content_infor.text.replace('xxx: ', '')))* k) ?+ M j% L9 r& N
- 4 T5 r) c* E& @7 T6 K! A, [0 q% L
- conn.commit(); q' [3 f8 Y# Y# e3 M
- print("第%s页" % str(i)) Y: r8 Z2 k+ u! p
- ) U8 W# G: s; u/ c # --------------------Split Line--------------------
- \" k6 F( Z# _6 t* w% C& }( ? conn.close()( ?+ ?: O/ z( k9 F [
4 Y& r p% R s% i( S- x x% u+ ^2 |3 K4 d a
Python交流群:1047602540
) h# ]$ m0 ~, P d |
zan
|