| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244 |
- package com.ruoyi.wisdomarbitrate.utils;
-
- import cn.hutool.core.collection.CollectionUtil;
- import cn.hutool.core.util.StrUtil;
- import com.ruoyi.common.constant.Constants;
- import com.ruoyi.common.exception.ServiceException;
- import com.ruoyi.common.utils.StringUtils;
- import com.ruoyi.wisdomarbitrate.domain.FatchRule;
- import com.tencentcloudapi.bsca.v20210811.models.LicenseSummary;
- import com.tencentcloudapi.common.Credential;
- import com.tencentcloudapi.common.exception.TencentCloudSDKException;
- import com.tencentcloudapi.common.profile.ClientProfile;
- import com.tencentcloudapi.common.profile.HttpProfile;
- import com.tencentcloudapi.ocr.v20181119.OcrClient;
- import com.tencentcloudapi.ocr.v20181119.models.*;
- import org.json.JSONArray;
- import org.json.JSONObject;
-
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.util.*;
- import java.util.stream.Collectors;
-
- public class OCRUtils {
- //API的SecretId
- private static final String SECRET_ID = "AKIDeEf2A8uX1HSainvvnXAc3X9ZlhtyvkMp";
- //API的SecretKey
- private static final String SECRET_KEY = "QjphKo8zkHZigT8j9PVtFPJyfIvO3d6V";
- // 仲裁申请书识别字段
- private static final String[] applicantName = {"申请人", "统一社会信用代码", "负责人", "住所", "联系地址"
- , "委托代理人", "联系电话", "电子邮件", "被申请人", "居民身份证号码", "仲裁请求", "事实和理由"};
- // 贷款合同识别字段
- private static final String[] contractName = {
- "合同编号","甲方(贷款人)" ,"或委托代理人签字:","本合同的初始贷款年利率为","乙方确认有效的电子信箱地址为"};
- // 调解协议识别字段
- private static final String[] accordName = {"金融消费纠纷基本情况","经调解,双方自愿达成如下协议"};
- // 授权委托书识别字段
- private static final String[] powerAttorneyName = {"职务"};
- /**
- * pdf识别成文字
- * @param imageBase64
- * @param pageNumber
- * @param fatchRules 抓取规则
- * @return
- */
- public static String pdfIdentifyText(String imageBase64, Integer pageNumber, List<FatchRule> fatchRules) {
- StringBuilder respStr=new StringBuilder();
- try {
- // 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密
- // 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305
- // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
- Credential cred = new Credential(SECRET_ID, SECRET_KEY);
- // 实例化一个http选项,可选的,没有特殊需求可以跳过
- HttpProfile httpProfile = new HttpProfile();
- httpProfile.setEndpoint("ocr.tencentcloudapi.com");
- // 实例化一个client选项,可选的,没有特殊需求可以跳过
- ClientProfile clientProfile = new ClientProfile();
- clientProfile.setHttpProfile(httpProfile);
- // 实例化要请求产品的client对象,clientProfile是可选的
- OcrClient client = new OcrClient(cred, "ap-beijing", clientProfile);
- // 实例化一个请求对象,每个接口都会对应一个request对象
- GeneralAccurateOCRRequest req = new GeneralAccurateOCRRequest();
- req.setImageBase64(imageBase64);
- req.setIsPdf(true);
- req.setPdfPageNumber(pageNumber.longValue());
- // 返回的resp是一个SmartStructuralOCRV2Response的实例,与请求对象对应
- GeneralAccurateOCRResponse resp = client.GeneralAccurateOCR(req);
- // 输出json格式的字符串回包
- System.out.println(GeneralAccurateOCRResponse.toJsonString(resp));
- // 获取响应内容
- TextDetection[] textDetections = resp.getTextDetections();
- if (textDetections == null || textDetections.length == 0) {
- return respStr.toString();
- }
- for (TextDetection textDetection : textDetections) {
- respStr.append(textDetection.getDetectedText());
- }
- }catch (TencentCloudSDKException e){
- throw new ServiceException("ocr识别失败");
- }
- if(respStr.toString().endsWith(String.valueOf(pageNumber))){
- int lastIndexOf = respStr.toString().lastIndexOf(String.valueOf(pageNumber));
- return respStr.toString().substring(0,lastIndexOf);
- }
- return respStr.toString();
-
-
-
- //
- // //解析数据
- // String s = GeneralAccurateOCRResponse.toJsonString(resp);
- // // 解析JSON数据
- // JSONObject jsonObject = new JSONObject(s);
- // JSONArray structuralList = jsonObject.getJSONArray("TextDetections");
- // // 遍历StructuralList中的Groups,获取Key对应的AutoName和Value对应的AutoConten
- // StringBuilder stringBuilder = new StringBuilder(); // 创建一个StringBuilder对象
- // for (int i = 0; i < structuralList.length(); i++) {
- // JSONArray groups = structuralList.getJSONObject(i).getJSONArray("Groups");
- // for (int j = 0; j < groups.length(); j++) {
- // JSONArray lines = groups.getJSONObject(j).getJSONArray("Lines");
- // for (int k = 0; k < lines.length(); k++) {
- // JSONObject line = lines.getJSONObject(k);
- // JSONObject key = line.getJSONObject("Key");
- // JSONObject value = line.getJSONObject("Value");
- // String autoName = key.getString("AutoName");
- // String autoContent = value.getString("AutoContent");
- // String text = autoName + Constants.PDFSTR + autoContent;
- // if (stringBuilder.length() > 0) {
- // stringBuilder.append(Constants.BR); // 在已有内容的情况下添加逗号分隔符
- // }
- // stringBuilder.append(text); // 拼接当前的字符串
- // }
- // }
- // }
- // return stringBuilder.toString(); // 获取最终的拼接结果
- // } catch (TencentCloudSDKException e) {
- // System.out.println(e.toString());
- // }
- }
-
- /**
- * pdf识别成文字
- * @param imageBase64
- * @param pageNumber
- * @param type pdf类型
- * @return
- */
- public static String pdfIdentifyText1(String imageBase64, Integer pageNumber,String type) {
- try {
- // 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密
- // 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305
- // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
- Credential cred = new Credential(SECRET_ID, SECRET_KEY);
- // 实例化一个http选项,可选的,没有特殊需求可以跳过
- HttpProfile httpProfile = new HttpProfile();
- httpProfile.setEndpoint("ocr.tencentcloudapi.com");
- // 实例化一个client选项,可选的,没有特殊需求可以跳过
- ClientProfile clientProfile = new ClientProfile();
- clientProfile.setHttpProfile(httpProfile);
- // 实例化要请求产品的client对象,clientProfile是可选的
- OcrClient client = new OcrClient(cred, "ap-beijing", clientProfile);
- // 实例化一个请求对象,每个接口都会对应一个request对象
- SmartStructuralOCRV2Request req = new SmartStructuralOCRV2Request();
- req.setImageBase64(imageBase64);
- req.setIsPdf(true);
- req.setPdfPageNumber(pageNumber.longValue());
- if(type.contains("申请书")){
- req.setItemNames(applicantName);
- }else if(type.contains("调解协议")){
- req.setItemNames(accordName);
- }else if(type.contains("合同")){
- req.setItemNames(contractName);
- }else if(type.contains("授权委托书")){
- req.setItemNames(powerAttorneyName);
- }
-
- // 返回的resp是一个SmartStructuralOCRV2Response的实例,与请求对象对应
- SmartStructuralOCRV2Response resp = client.SmartStructuralOCRV2(req);
- // 输出json格式的字符串回包
- System.out.println(SmartStructuralOCRV2Response.toJsonString(resp));
- //解析数据
- String s = SmartStructuralOCRV2Response.toJsonString(resp);
- // 解析JSON数据
- JSONObject jsonObject = new JSONObject(s);
- JSONArray structuralList = jsonObject.getJSONArray("StructuralList");
- // 遍历StructuralList中的Groups,获取Key对应的AutoName和Value对应的AutoConten
- StringBuilder stringBuilder = new StringBuilder(); // 创建一个StringBuilder对象
- for (int i = 0; i < structuralList.length(); i++) {
- JSONArray groups = structuralList.getJSONObject(i).getJSONArray("Groups");
- for (int j = 0; j < groups.length(); j++) {
- JSONArray lines = groups.getJSONObject(j).getJSONArray("Lines");
- for (int k = 0; k < lines.length(); k++) {
- JSONObject line = lines.getJSONObject(k);
- JSONObject key = line.getJSONObject("Key");
- JSONObject value = line.getJSONObject("Value");
- String autoName = key.getString("AutoName");
- String autoContent = value.getString("AutoContent");
- String text = autoName + Constants.PDFSTR + autoContent;
- if (stringBuilder.length() > 0) {
- stringBuilder.append(Constants.BR); // 在已有内容的情况下添加逗号分隔符
- }
- stringBuilder.append(text); // 拼接当前的字符串
- }
- }
- }
- return stringBuilder.toString(); // 获取最终的拼接结果
- } catch (TencentCloudSDKException e) {
- System.out.println(e.toString());
- }
- return null;
- }
- public static String pdfConvertBase64(String pathUrl){
- try {
- File file = new File(pathUrl);
- FileInputStream fileInputStream = new FileInputStream(file);
- byte[] fileBytes = new byte[(int) file.length()];
- fileInputStream.read(fileBytes);
- fileInputStream.close();
- // 将字节数组转换为Base64值
- return Base64.getEncoder().encodeToString(fileBytes);
- } catch (IOException e) {
- e.printStackTrace();
- }
- return null;
- }
-
- /**
- * 根据抓取规则获取内容
- * @param ocrText ocr识别的text
- * @param fatchRules 抓取规则
- * @return
- */
- public static void fatchRuleGetContent(String ocrText, List<FatchRule> fatchRules, Map<String, String> fatchMap) {
-
- if (CollectionUtil.isNotEmpty(fatchRules)) {
-
- for (FatchRule fatchRule : fatchRules) {
- if (StrUtil.isEmpty(fatchRule.getStartContent())) {
- continue;
- }
-
- if (StrUtil.isNotEmpty(fatchRule.getStartContent()) && StrUtil.isNotEmpty(fatchRule.getEndContent())) {
- String s = StringUtils.substringBetween(ocrText, fatchRule.getStartContent(), fatchRule.getEndContent());
- if(StrUtil.isNotEmpty(s)){
- fatchMap.put(fatchRule.getColumnName(), StrUtil.trim(s));
- }else {
- fatchMap.put(fatchRule.getColumnName(),"");
- }
-
- }else if(StrUtil.isNotEmpty(fatchRule.getStartContent()) && StrUtil.isEmpty(fatchRule.getEndContent())){
- String s = StringUtils.substringAfter(ocrText,fatchRule.getStartContent());
- if(StrUtil.isNotEmpty(s)){
- fatchMap.put(fatchRule.getColumnName(), StrUtil.trim(s));
- }else {
- fatchMap.put(fatchRule.getColumnName(),"");
- }
- }
- }
- }
-
-
- }
- }
|