数学建模社区-数学中国
标题:
Python-百度ocr识别
[打印本页]
作者:
檀俾九
时间:
2021-1-12 09:38
标题:
Python-百度ocr识别
3 P# { W/ `8 J2 ]
[code=python]#!/usr/bin/python
9 X. w0 h0 K. c0 e8 q/ B/ g
# -*- coding: utf-8 -*-
3 _5 I5 N0 I8 n- U
- @2 j0 I" j1 `* A9 @
import base64
6 w% d/ W; l: J
from os.path import exists
. |1 b4 S% h8 M& D, o. D# y# J b/ m
from tkinter import Tk
; X, N+ q) p1 ~2 I
from tkinter.filedialog import askopenfilename
. F( A8 H4 {8 v% t3 l" r3 Z, i
from urllib.parse import urlencode
/ ?: |" G. _8 z6 V. P9 \
. J3 f2 R2 t5 g
import requests
2 P/ X9 g0 i4 x. {( Y/ V. y# d
9 ^; o+ t3 x/ U0 e+ T @& N
9 ]) [( L5 e2 J% z. R
def Dialog_box():
1 ~. O7 ]% R5 J: \1 m
root = Tk()
% e! A3 I! y3 T* h4 [
root.withdraw()
0 J% u' T, X( u* E% U7 z
' X4 A5 K* O9 g# I. o
filetypes = [(
; ]" x- E$ ]/ F( S, o8 F+ f4 T! d3 z
'图片文件(*.jpg;*.jpeg;*.gif;*.png;*.bmp)', '*.jpg;*.jpeg;*.gif;*.png;*.bmp'),
0 f# Z# d% a& f; ]
('所有文件(*.*)', '*.*')]
0 g. Z) {- S6 \9 z) u$ S9 f* k" d& i
9 H; Z# E) H- G( ?( v$ [. Z5 ~
file_path = askopenfilename(filetypes=filetypes)
" _. p7 I$ |+ H& c% S' _5 z, i
return file_path
& U$ d+ A+ Z4 x7 a6 a) W9 h, P
# S& E7 l- j/ [4 Y8 c1 h. N
7 Y# @4 o. f2 |, | W% g
def Read_file(file_path):
! X/ p2 I! h2 ~3 c! G4 _+ x1 X) p5 K
with open(file_path, mode='rb') as file_object:
8 `+ a* u6 h/ p2 k e9 y$ f2 H
contents = file_object.read()
! f" `" V3 a" y# f; {
return contents.rstrip()
7 ]+ _) A1 U5 X& x
9 a8 g% M6 A4 X2 z4 `1 c$ J
: L' k' ]! G3 i5 z7 b
def Baidu_ocr(binary):
9 x6 m4 X: }5 W& P: z
url = "https://aip.baidubce.com/oauth/2.0/token?"
4 m' ?" r0 x: N" X) T
data = {
" c: a, |" X5 G3 w/ Z9 B0 R2 X1 G
"grant_type": "client_credentials",
+ _5 U7 N" o- C6 m
"client_id": "", # 必填
4 Q( X8 |' N3 i
"client_secret": "" # 必填
8 t' M4 {0 q& I3 \. w$ R3 I5 j
}
6 p7 N2 \6 s1 V i- R
url += urlencode(data)
. J- I% M9 m2 j$ z$ o; j
response = requests.get(url=url).json()
' J5 J" N8 j/ \/ O4 m) i. F
access_token = response['access_token']
( q4 J+ Q3 k5 C* k
! }( K1 |- e. n# K
url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=" + access_token
% k& ]8 v, x5 f7 o
post_data = {
( Y/ S) n5 W) B) Y4 L
"image": base64.b64encode(binary),
: K( @; L1 [! i+ f7 H A
"url": "",
) o! b: d( T: A# R2 @( y
"language_type": "CHN_ENG",
; X) t$ V0 i7 D) v
"detect_direction": "false",
+ M$ c6 j7 }5 G, L8 e+ J
"detect_language": "false",
* c. A4 \0 ?# i3 l4 f
"probability": "false"
! Y/ H7 w# V, ?" @
}
* [* M. Y+ P; B, S/ G8 K
response = requests.post(url=url, data=post_data).json()
) J+ Y4 Q! {4 ?4 w* Y- z# {
num = response['words_result_num']
9 O% \5 u: j) _2 g/ }6 f$ G
result = ''
1 G9 S( E( \% X7 C
for i in range(num):
) H E$ u7 b9 w3 `1 L
result += response['words_result'][ i]['words'] + "\r\n"
7 }( @5 R9 t" G- A' z0 i
return result
: n `$ I' @* F3 J) z, b; r
, |! G; Z1 M* ]2 N* R$ {3 M+ w
3 U* a: \3 I" r4 ^3 `
if __name__ == "__main__":
. U# Z# ^- o( G! k
file_path = Dialog_box()
* b C7 c+ P# {* a. G1 J" ` o8 A
if exists(file_path):
. ?2 W8 N- c" p& o
data = Read_file(file_path)
: ~5 R$ L: X; ?
print(Baidu_ocr(data))
4 h8 f. w, a7 V/ l1 B% X6 ~) K# B x
@' R$ ?. s' j- l
[/code]
6 [8 ~# i; u, Z% H8 [- k
7 }- |; p- A) i1 ~! x+ [
. K6 O* `% H& r( N) v
转发自
派生社区
Python交流群:1047602540
/ I/ F, g* D$ I7 X
欢迎光临 数学建模社区-数学中国 (http://www.madio.net/)
Powered by Discuz! X2.5