数学建模社区-数学中国
标题:
Python-百度ocr识别
[打印本页]
作者:
檀俾九
时间:
2021-1-12 09:38
标题:
Python-百度ocr识别
, _9 m# `, i/ q1 H/ i# V
[code=python]#!/usr/bin/python
, D' ?3 x: J& j) g& o1 u( _. F2 X9 h
# -*- coding: utf-8 -*-
/ f6 R4 n+ q* s4 }
6 {% Q0 R5 x' H! h- N( y+ ?
import base64
& `3 N# J2 r& Q5 l9 l
from os.path import exists
" w! ?! `% x+ p/ K5 Q/ p) j
from tkinter import Tk
3 y5 H( K4 p2 x1 N' r
from tkinter.filedialog import askopenfilename
% z0 x/ ~9 _ K9 u) c5 W
from urllib.parse import urlencode
# b- v0 D8 M/ l
, N8 v9 W& \1 f8 E& E
import requests
) _( W# r; p6 M$ U% g* z9 i. d) N t
7 e: ^! M9 J M; C4 ]
# b+ F) {5 o/ F7 Y
def Dialog_box():
+ C& `/ r& j) ?0 l
root = Tk()
5 p; O2 t( s4 O. v1 V/ @9 k8 C, b/ f- u
root.withdraw()
. ~: u! @! j4 E( \/ O
H& ~- P, t. B/ \
filetypes = [(
6 g+ j0 h! R. [' c
'图片文件(*.jpg;*.jpeg;*.gif;*.png;*.bmp)', '*.jpg;*.jpeg;*.gif;*.png;*.bmp'),
4 S9 q, x& o6 j7 }
('所有文件(*.*)', '*.*')]
7 K0 q! m- f- I( V+ q6 U- G+ _) {
: [' l5 T' P& m0 H: S0 \
file_path = askopenfilename(filetypes=filetypes)
3 W# n, m0 y8 S
return file_path
* F! f) _' D5 ]8 p
* `- q+ f/ T. Z- z5 e; a& B# w
( m1 u" Y; s2 @6 x3 b
def Read_file(file_path):
/ X4 [6 w, U6 O% ~* a o) ^
with open(file_path, mode='rb') as file_object:
/ V( ?% T& s4 N. ?0 s% C
contents = file_object.read()
2 R) O% M8 e( @$ C+ j( z6 g
return contents.rstrip()
, N* ^1 ^4 j& l2 e7 {0 x a: G& E
* e; B5 X( |" Z* W) d/ o
; s9 m* _" q0 M1 i2 U1 |) r
def Baidu_ocr(binary):
/ ~1 a* {; h0 X; E9 N+ i
url = "https://aip.baidubce.com/oauth/2.0/token?"
5 T# u& G: I3 I- ^, |
data = {
- B) O5 t4 E4 y$ z$ F: f# `" X
"grant_type": "client_credentials",
/ C; |7 ~, _2 Z( v0 `* e3 r9 i
"client_id": "", # 必填
) ^& U. @( _& E4 W
"client_secret": "" # 必填
u- `, c: q. \ f- j( I4 B& o0 j
}
: C. |8 H9 y" L W/ G/ |9 [& _
url += urlencode(data)
( T4 W9 {, [/ {/ j5 V) M
response = requests.get(url=url).json()
, r* _' b$ z- Z9 Q* o. J
access_token = response['access_token']
% O# o! d* P9 N6 p
( s& z- R: p5 O8 Y p
url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=" + access_token
6 q* h0 A) k0 @2 U# s& P, b0 v
post_data = {
% M$ n" |8 N6 }5 ~ v! h0 Y
"image": base64.b64encode(binary),
! S! q) J7 i6 L* Z+ \+ U
"url": "",
0 p. x8 M0 ^: o3 {9 d
"language_type": "CHN_ENG",
$ F- z% \& u6 t- m. O
"detect_direction": "false",
' F6 m5 R" q. `; Y) m, N
"detect_language": "false",
$ I2 c/ X' d# r! X* K ~5 b
"probability": "false"
! H; a0 `# W0 L5 G8 K
}
) f+ V( U. d8 r9 u; W/ o
response = requests.post(url=url, data=post_data).json()
9 }. \: C# I! Z, z3 H! u
num = response['words_result_num']
. n, C/ U9 P, D5 w6 J3 ]4 _
result = ''
+ w1 m4 c" J/ P& c' y% O
for i in range(num):
9 U3 i' D2 E- J+ D2 n* K' I
result += response['words_result'][ i]['words'] + "\r\n"
1 @* X* \7 d' O+ R, L% G0 W
return result
* M* Q5 |! n3 d2 \6 _
3 n: \& y2 t, v" V2 O/ Y
% J0 J; p5 Z. V) C4 v7 s" A
if __name__ == "__main__":
& a/ D `' @& t: t1 w
file_path = Dialog_box()
4 J! @5 y/ G$ c. {/ V* C
if exists(file_path):
, t9 B) S: W( V5 [ r
data = Read_file(file_path)
! [! Y# R8 r% p2 [3 p
print(Baidu_ocr(data))
* L7 e6 w+ V4 d7 T$ Y4 o7 T) X% O
: ~! I4 i! x: ]% h, {* S
[/code]
1 t2 K; [& [ m) _4 a) V( `( l6 C
; _" R2 h0 C. ?5 z& z3 l4 }
: u8 D! j s7 r8 Y
转发自
派生社区
Python交流群:1047602540
3 {3 R" S6 c# h/ u4 J5 `8 A3 Y% _
欢迎光临 数学建模社区-数学中国 (http://www.madio.net/)
Powered by Discuz! X2.5