pdf/图片整体解析 api
小艾同学 ... 大约 4 分钟
# pdf/图片整体解析 api
格式参考 https://openai.100tal.com/documents/article/page?fromWhichSys=console&id=201
下面这些内容还没有测试!!!
# 简介
主 api,用户需要先注册登录,然后才能正常使用。注意使用的时候需要带 token。
# 接口/路由/入口
- 接口名称:pdf/图片整体解析
- 接口地址:https://www.docpartner-serve.com/python/bpInterfaceCallInvLatex
- 请求方式:POST
- 响应类型:JSON(同步)
- content-type:application/json
# 输入说明
{
"requestData": {
"reqImgData": "asdfasf", // base64 图片/pdf/..其他文件类型的 base64 数据
"reqImgInfo": {
"isContainsLatex":1, // 是否包含数学公式,包含则将其解析
"isContainsText":1, // 是否包含文字,包含则将其解析
"isContainsTable":0, // 是否包含表格,包含则将其解析
"isContainsGraph":0, // 是否包含图片,包含则将其解析
"isPrinted":1, // 打印
"isAuto":1, // 是否自动识别,如果是,则会对数学公式、文字、表格、图片等内容进行检测,检测包含,则对其进行解析 NOTE 如果 isAuto, 则无需指定 isContainsLatex 等
"isTranslate":0 // 是否需要翻译
},
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
2
3
4
5
6
7
8
9
10
11
12
13
14
# 输出说明
{
"isSuccessful": True,
"errorMsg" : "",
"data" : {
"respData" : {
"username":"te", // 用户名称
"time":"teat",
"errorMsg":"", // 错误信息
"angle":90,
"isNeedReturnImg":1, // 是否需要返回附带的图片
"imgInfo":{
"isContainsLatex":1,
"isContainsText":1,
"isContainsTable":0,
"isContainsGraph":0,
"isPrinted":1 // 打印
},
"confidence":{
"latex":0.999
}, // 置信度
"html":{
"htmlFile":"./test/ad.html",
"htmlFolder":"username/time",
"htmlImgFolder":"username/time/img",
},
"tex":{
"latexStr":"dasfdsaf",
"imgFileList":[
{
"imgFile":"dafds.png", // 前端tex中的图片
"imgData":"sdfsaff", // base64
},
{
"imgFile":"dafds.png", // 前端tex中的图片
"imgData":"sdfsaff", // base64
},
]
},
"outLocationObjDictList":[
{
"":,
"":,
}
] // 边框信息
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# 程序示例
# js 示例
// 基于 axios
import axios from "axios"
let token = "TODO" // TODO
let bpInterfaceCallInvLatex = "/bpInterfaceCallInvLatex"
let requestJsonData = {
requestData: {
reqImgData: "TODO", // base64 字符串
reqImgInfo: {
isContainsLatex: 1,
isContainsText: 1,
isContainsTable: 0,
isContainsGraph: 0,
isPrinted: 1,
isAuto: 1,
isTranslate: 0
}
}
}
axios.post(
bpInterfaceCallInvLatex,
requestJsonData,
{
baseURL: 'https://some-domain.com/api/', // TODO
headers: {
"Authorization":`Bearer ${token}`,
"Content-Type":"application/json;charset=utf-8",
},
responseType: 'json'
}
)
.then(resp => {
if (resp.data.isSuccessful) {
console.log(resp.data.data)
} else {
console.log(`请求api失败!${resp.data.errorMsg}`);
}
})
.catch(err => {
console.log(err)
})
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# python 示例
# 基于 requests
import os
import base64
import requests
token = "" # TODO
bp = "bpInterfaceCallInvLatex"
PORT = "8089" # TODO
url = "https://www.docpartner-serve:{port}/python/{bp}".format(
port=PORT,
bp=bp
) # 接口地址
imgPath = "test.png" # TODO
with open(imgPath, "rb") as f:
imgDataStr = base64.b64encode(f.read()) # --> b''
imgDataBase64 = imgDataStr.decode() # --> str 图片 base64 编码成字符串
jsondata = {
"requestData": {
"reqImgData": imgDataBase64,
"reqImgInfo": {
"isContainsLatex":1,
"isContainsText":1,
"isContainsTable":0,
"isContainsGraph":0,
"isPrinted":1,
"isAuto":1,
"isTranslate":1
},
}
}
headers = {
'Authorization': "Bearer " + token
}
r = requests.post(url,headers=headers,data=None,json=jsondata)
assert type(r.json()) == dict
print(r.json())
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# java 示例
// 基于 jodd-http
import java.util.Map;
import jodd.http.HttpRequest;
import jodd.http.HttpResponse;
import sun.misc.BASE64Decoder;
import sun.misc.BASE64Encoder;
import java.util.HashMap;
import com.alibaba.fastjson;
import java.io.*;
public class HttpApiDemo {
public static void main(String[] args) {
String url = "http://localhost:8088/joddhttp/test"; // TODO
String token = ""; // TODO
String imgPath = ""; // TODO
byte[] data = null;
InputStream in = new FileInputStream(imgPath); // 读取图片字节数组
var data = new byte[in.available()];
in.read(data);
in.close();
// 对字节数组进行Base64编码,得到Base64编码的字符串
BASE64Encoder encoder = new BASE64Encoder();
String imgDataBase64 = encoder.encode(data);
//
Map reqImgInfo = new HashMap();
reqImgInfo.put("isContainsLatex",true);
reqImgInfo.put("isContainsText",true);
reqImgInfo.put("isContainsTable",true);
reqImgInfo.put("isContainsGraph",true);
reqImgInfo.put("isPrinted",true);
reqImgInfo.put("isAuto",true);
reqImgInfo.put("isTranslate",true);
Map requestData = new HashMap();
requestData.put("reqImgData",imgDataBase64);
requestData.put("reqImgInfo",reqImgInfo);
Map jsondata = new HashMap();
jsondata.put("requestData",requestData);
String jsonDataStr = JSON.toJSONString(jsondata);
HttpRequest request = HttpRequest.post(url);
request.contentType("application/json");
request.charset("utf-8");
request.acceptEncoding("gzip");
request.header("Authorization", "Bearer " + token);
request.body(jsonDataStr);
HttpResponse response = request.send();
String respJson = response.charset("UTF-8").bodyText();
return respJson;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# C# 示例
using System;
using System.IO;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using Newtonsoft.Json;
using System.Drawing;
using System.Web;
using System.Drawing.Imaging;
namespace HttpApiDemoApp
{
class HttpApiDemo
{
static void Main(string[] args)
{
string url = ""; // TODO
string token = ""; // TODO
string imgPath = ""; // TODO
Bitmap bmp = new Bitmap(imgPath);
MemoryStream ms = new MemoryStream();
bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Jpeg);
byte[] arr = new byte[ms.Length];
ms.Position = 0;
ms.Read(arr, 0, (int)ms.Length);
ms.Close();
string imgDataBase64 = Convert.ToBase64String(arr);
var reqImgInfo = new Dictionary<string, bool>();
reqImgInfo.Add("isContainsLatex",True);
reqImgInfo.Add("isContainsText",True);
reqImgInfo.Add("isContainsTable",True);
reqImgInfo.Add("isContainsGraph",True);
reqImgInfo.Add("isPrinted",True);
reqImgInfo.Add("isAuto",True);
reqImgInfo.Add("isTranslate",True);
var requestData = new Dictionary<string, object>();
requestData.Add("reqImgData",imgDataBase64)
requestData.Add("reqImgInfo",reqImgInfo)
var Content = new Dictionary<string, object>();
Content.Add("requestData", requestData);
string jsonData = JsonConvert.SerializeObject(Content); // Dictionary转json
// http
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.ServicePoint.Expect100Continue = false; //指定此属性为false
request.Method = "POST";
request.ContentType = "application/json";
request.Header.Add("Authorization","Bearer " + token);
var requestStream = request.GetRequestStream();
var streamWriter = new StreamWriter(requestStream, Encoding.GetEncoding("utf-8"));
streamWriter.Write(jsonData);
streamWriter.Flush();
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
var responseStream = response.GetResponseStream();
var streamReader = new StreamReader(responseStream, Encoding.GetEncoding("utf-8"));
string result = reader.ReadToEnd(); // 获取响应内容
streamReader.Close();
responseStream.Close();
Console.WriteLine("end");
Console.ReadKey();
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# go 示例
package demo
import (
"bytes"
"encoding/json"
"io"
"io/ioutil"
"net/http"
"net"
"image"
"encoding/base64"
"strings"
)
func main() {
url := "" // TODO
token := "" // TODO
imgPath := "" // TODO
ff, _ := ioutil.ReadFile(imgPath) // []byte
bufstore := make([]byte, 5000000) // 数据缓存
imgDataBase64 := base64.StdEncoding.Encode(bufstore, ff) // 文件转base64
// contentType := "application/json"
reqImgInfo := make(map[string]interface{})
reqImgInfo["isContainsLatex"] = true
reqImgInfo["isContainsText"] = true
reqImgInfo["isContainsTable"] = true
reqImgInfo["isContainsGraph"] = true
reqImgInfo["isPrinted"] = true
reqImgInfo["isAuto"] = true
reqImgInfo["isTranslate"] = true
requestData := make(map[string]interface{})
requestData["reqImgData"] = imgDataBase64
requestData["reqImgInfo"] = reqImgInfo
jsondata := make(map[string]interface{})
jsondata["requestData"] = requestData
jsonDataStr, _ := json.Marshal(jsondata)
//
client := &http.Client{}
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonDataStr))
req.Header.Add("Authorization", "Bearer " + token)
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
result, _ := ioutil.ReadAll(resp.Body)
resStr := string(body)
fmt.Println(resStr)
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# php 示例
<?php
// 基于 curl
function posturl(){
$token = ""; // TODO
$url = ""; // TODO
$imgPath = ""; // TODO
if($fp = fopen($imgPath,"rb", 0))
{
$gambar = fread($fp,filesize($imgPath));
fclose($fp);
$imgDataBase64 = chunk_split(base64_encode($gambar));
}
$jsondata = array(
"requestData" => array(
"reqImgData" => imgDataBase64,
"reqImgInfo" => array(
"isContainsLatex" => true,
"isContainsText" => true,
"isContainsTable" => true,
"isContainsGraph" => true,
"isPrinted" => true,
"isAuto" => true,
"isTranslate" => true
)
)
);
$jsonDataStr = json_encode($jsondata);
$headerArray = array(
"Content-type:application/json;charset='utf-8'",
"Accept:application/json",
"Authorization:Bearer "+$token
);
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST,FALSE);
curl_setopt($curl, CURLOPT_POST, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $jsonDataStr);
curl_setopt($curl, CURLOPT_HTTPHEADER, $headerArray);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$output = curl_exec($curl);
curl_close($curl);
echo $output
}
?>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47