| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- package com.ruoyi.wisdomarbitrate.utils;
-
-
- import com.documents4j.api.DocumentType;
- import com.documents4j.api.IConverter;
- import com.documents4j.job.LocalConverter;
- import com.tencentcloudapi.common.Credential;
- import com.tencentcloudapi.common.exception.TencentCloudSDKException;
- import com.tencentcloudapi.common.profile.ClientProfile;
- import com.tencentcloudapi.common.profile.HttpProfile;
- import com.tencentcloudapi.ocr.v20181119.OcrClient;
- import com.tencentcloudapi.ocr.v20181119.models.GeneralAccurateOCRRequest;
- import com.tencentcloudapi.ocr.v20181119.models.GeneralAccurateOCRResponse;
- import com.tencentcloudapi.ocr.v20181119.models.SmartStructuralOCRV2Request;
- import com.tencentcloudapi.ocr.v20181119.models.SmartStructuralOCRV2Response;
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.json.JSONArray;
- import org.json.JSONObject;
-
-
- import java.io.*;
- import java.util.*;
-
- public class Tset {
- public static void main(String[] args) {
- //API的SecretId
- final String SECRET_ID = "AKIDeEf2A8uX1HSainvvnXAc3X9ZlhtyvkMp";
- //API的SecretKey
- final String SECRET_KEY = "QjphKo8zkHZigT8j9PVtFPJyfIvO3d6V";
-
- // String pdfFilePath = "http://121.40.189.20:9000/API/uploadPath/upload/ca2ca4697e5449ff9b9d23f95b221f58.pdf";
- // String pdfFilePath = "http://121.40.189.20:9000/API/uploadPath/upload/2023/11/14/ca2ca4697e5449ff9b9d23f95b221f58.pdf";
- String pdfFilePath ="D:/home/unzip/b92a7291-441e-4fad-95b4-7305f206eabd/仲裁材料/二、案件基本材料/2-1 仲裁申请书.pdf";
- try{
- // 读取文件内容到字节数组
- byte[] fileBytes = readFileToBytes(pdfFilePath);
- // 将字节数组转换为Base64值
- String base64String = encodeBytesToBase64(fileBytes);
-
- // 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密
- // 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305
- // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
- Credential cred = new Credential(SECRET_ID, SECRET_KEY);
- // 实例化一个http选项,可选的,没有特殊需求可以跳过
- HttpProfile httpProfile = new HttpProfile();
- httpProfile.setEndpoint("ocr.tencentcloudapi.com");
- // 实例化一个client选项,可选的,没有特殊需求可以跳过
- ClientProfile clientProfile = new ClientProfile();
- clientProfile.setHttpProfile(httpProfile);
- // 实例化要请求产品的client对象,clientProfile是可选的
- OcrClient client = new OcrClient(cred, "ap-beijing", clientProfile);
- // 实例化一个请求对象,每个接口都会对应一个request对象
- SmartStructuralOCRV2Request req = new SmartStructuralOCRV2Request();
- req.setImageBase64(base64String);
- req.setIsPdf(true);
- req.setPdfPageNumber(1L);
- String[] itemNames1 = {"申请人", "统一社会信用代码", "负责人", "住所", "联系地址"
- , "委托代理人", "联系电话", "电子邮件", "被申请人", "居民身份证号码", "仲裁请求", "事实和理由"};
- req.setItemNames(itemNames1);
- // 返回的resp是一个SmartStructuralOCRV2Response的实例,与请求对象对应
- SmartStructuralOCRV2Response resp = client.SmartStructuralOCRV2(req);
-
-
- //解析数据
- String s = SmartStructuralOCRV2Response.toJsonString(resp);
- // 解析JSON数据
- JSONObject jsonObject = new JSONObject(s);
- JSONArray structuralList = jsonObject.getJSONArray("StructuralList");
- // 遍历StructuralList中的Groups,获取Key对应的AutoName和Value对应的AutoContent
- StringBuilder stringBuilder = new StringBuilder(); // 创建一个StringBuilder对象
- for (int i = 0; i < structuralList.length(); i++) {
- JSONArray groups = structuralList.getJSONObject(i).getJSONArray("Groups");
- for (int j = 0; j < groups.length(); j++) {
- JSONArray lines = groups.getJSONObject(j).getJSONArray("Lines");
- for (int k = 0; k < lines.length(); k++) {
- JSONObject line = lines.getJSONObject(k);
- JSONObject key = line.getJSONObject("Key");
- JSONObject value = line.getJSONObject("Value");
- String autoName = key.getString("AutoName");
- String autoContent = value.getString("AutoContent");
- String text = autoName + ":" + autoContent;
- if (stringBuilder.length() > 0) {
- stringBuilder.append(","); // 在已有内容的情况下添加逗号分隔符
- }
- stringBuilder.append(text); // 拼接当前的字符串
- }
- }
- }
- System.out.println("stringBuilder======"+stringBuilder.toString());
- // 将字符串按逗号分隔符切割
- String[] fields = stringBuilder.toString().split(",");
- Map<String, List<String>> map = new HashMap<>();
- for (String field : fields) {
- // 对于每个字段,再按冒号分隔符拆分出键和值
- String[] keyValue = field.split(":");
- if (keyValue.length == 2) { // 对于合法的键值对,将其添加到JSON对象中
- String key = keyValue[0];
- String value = keyValue[1];
- // 判断Map中是否已存在该键
- if (map.containsKey(key)) {
- // 如果已存在,获取该键对应的值,并将新的值添加到集合中
- List<String> values = map.get(key);
- values.add(value);
- } else {
- // 如果不存在,创建一个新的集合,并将值添加到集合中
- List<String> values = new ArrayList<>();
- values.add(value);
- map.put(key, values);
- }
- }
- }
- System.out.println(map);
- } catch (Exception e) {
- System.out.println(e.toString());
- }
-
- }
- private static byte[] readFileToBytes(String filePath) throws IOException {
- File file = new File(filePath);
- FileInputStream fileInputStream = new FileInputStream(file);
- byte[] fileBytes = new byte[(int) file.length()];
- fileInputStream.read(fileBytes);
- fileInputStream.close();
- return fileBytes;
- }
-
- private static String encodeBytesToBase64(byte[] bytes) {
- return Base64.getEncoder().encodeToString(bytes);
- }
- }
|