package com.ruoyi.wisdomarbitrate.utils; import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.util.StrUtil; import com.ruoyi.common.constant.Constants; import com.ruoyi.common.exception.ServiceException; import com.ruoyi.common.utils.StringUtils; import com.ruoyi.wisdomarbitrate.domain.FatchRule; import com.tencentcloudapi.bsca.v20210811.models.LicenseSummary; import com.tencentcloudapi.common.Credential; import com.tencentcloudapi.common.exception.TencentCloudSDKException; import com.tencentcloudapi.common.profile.ClientProfile; import com.tencentcloudapi.common.profile.HttpProfile; import com.tencentcloudapi.ocr.v20181119.OcrClient; import com.tencentcloudapi.ocr.v20181119.models.*; import org.json.JSONArray; import org.json.JSONObject; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.*; import java.util.stream.Collectors; public class OCRUtils { //API的SecretId private static final String SECRET_ID = "AKIDeEf2A8uX1HSainvvnXAc3X9ZlhtyvkMp"; //API的SecretKey private static final String SECRET_KEY = "QjphKo8zkHZigT8j9PVtFPJyfIvO3d6V"; // 仲裁申请书识别字段 private static final String[] applicantName = {"申请人", "统一社会信用代码", "负责人", "住所", "联系地址" , "委托代理人", "联系电话", "电子邮件", "被申请人", "居民身份证号码", "仲裁请求", "事实和理由"}; // 贷款合同识别字段 private static final String[] contractName = { "合同编号","甲方(贷款人)" ,"或委托代理人签字:","本合同的初始贷款年利率为","乙方确认有效的电子信箱地址为"}; // 调解协议识别字段 private static final String[] accordName = {"金融消费纠纷基本情况","经调解,双方自愿达成如下协议"}; // 授权委托书识别字段 private static final String[] powerAttorneyName = {"职务"}; /** * pdf识别成文字 * @param imageBase64 * @param pageNumber * @param fatchRules 抓取规则 * @return */ public static String pdfIdentifyText(String imageBase64, Integer pageNumber, List fatchRules) { StringBuilder respStr=new StringBuilder(); try { // 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密 // 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305 // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取 Credential cred = new Credential(SECRET_ID, SECRET_KEY); // 实例化一个http选项,可选的,没有特殊需求可以跳过 HttpProfile httpProfile = new HttpProfile(); httpProfile.setEndpoint("ocr.tencentcloudapi.com"); // 实例化一个client选项,可选的,没有特殊需求可以跳过 ClientProfile clientProfile = new ClientProfile(); clientProfile.setHttpProfile(httpProfile); // 实例化要请求产品的client对象,clientProfile是可选的 OcrClient client = new OcrClient(cred, "ap-beijing", clientProfile); // 实例化一个请求对象,每个接口都会对应一个request对象 GeneralAccurateOCRRequest req = new GeneralAccurateOCRRequest(); req.setImageBase64(imageBase64); req.setIsPdf(true); req.setPdfPageNumber(pageNumber.longValue()); // 返回的resp是一个SmartStructuralOCRV2Response的实例,与请求对象对应 GeneralAccurateOCRResponse resp = client.GeneralAccurateOCR(req); // 输出json格式的字符串回包 System.out.println(GeneralAccurateOCRResponse.toJsonString(resp)); // 获取响应内容 TextDetection[] textDetections = resp.getTextDetections(); if (textDetections == null || textDetections.length == 0) { return respStr.toString(); } for (TextDetection textDetection : textDetections) { respStr.append(textDetection.getDetectedText()); } }catch (TencentCloudSDKException e){ throw new ServiceException("ocr识别失败"); } if(respStr.toString().endsWith(String.valueOf(pageNumber))){ int lastIndexOf = respStr.toString().lastIndexOf(String.valueOf(pageNumber)); return respStr.toString().substring(0,lastIndexOf); } return respStr.toString(); // // //解析数据 // String s = GeneralAccurateOCRResponse.toJsonString(resp); // // 解析JSON数据 // JSONObject jsonObject = new JSONObject(s); // JSONArray structuralList = jsonObject.getJSONArray("TextDetections"); // // 遍历StructuralList中的Groups,获取Key对应的AutoName和Value对应的AutoConten // StringBuilder stringBuilder = new StringBuilder(); // 创建一个StringBuilder对象 // for (int i = 0; i < structuralList.length(); i++) { // JSONArray groups = structuralList.getJSONObject(i).getJSONArray("Groups"); // for (int j = 0; j < groups.length(); j++) { // JSONArray lines = groups.getJSONObject(j).getJSONArray("Lines"); // for (int k = 0; k < lines.length(); k++) { // JSONObject line = lines.getJSONObject(k); // JSONObject key = line.getJSONObject("Key"); // JSONObject value = line.getJSONObject("Value"); // String autoName = key.getString("AutoName"); // String autoContent = value.getString("AutoContent"); // String text = autoName + Constants.PDFSTR + autoContent; // if (stringBuilder.length() > 0) { // stringBuilder.append(Constants.BR); // 在已有内容的情况下添加逗号分隔符 // } // stringBuilder.append(text); // 拼接当前的字符串 // } // } // } // return stringBuilder.toString(); // 获取最终的拼接结果 // } catch (TencentCloudSDKException e) { // System.out.println(e.toString()); // } } /** * pdf识别成文字 * @param imageBase64 * @param pageNumber * @param type pdf类型 * @return */ public static String pdfIdentifyText1(String imageBase64, Integer pageNumber,String type) { try { // 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密 // 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305 // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取 Credential cred = new Credential(SECRET_ID, SECRET_KEY); // 实例化一个http选项,可选的,没有特殊需求可以跳过 HttpProfile httpProfile = new HttpProfile(); httpProfile.setEndpoint("ocr.tencentcloudapi.com"); // 实例化一个client选项,可选的,没有特殊需求可以跳过 ClientProfile clientProfile = new ClientProfile(); clientProfile.setHttpProfile(httpProfile); // 实例化要请求产品的client对象,clientProfile是可选的 OcrClient client = new OcrClient(cred, "ap-beijing", clientProfile); // 实例化一个请求对象,每个接口都会对应一个request对象 SmartStructuralOCRV2Request req = new SmartStructuralOCRV2Request(); req.setImageBase64(imageBase64); req.setIsPdf(true); req.setPdfPageNumber(pageNumber.longValue()); if(type.contains("申请书")){ req.setItemNames(applicantName); }else if(type.contains("调解协议")){ req.setItemNames(accordName); }else if(type.contains("合同")){ req.setItemNames(contractName); }else if(type.contains("授权委托书")){ req.setItemNames(powerAttorneyName); } // 返回的resp是一个SmartStructuralOCRV2Response的实例,与请求对象对应 SmartStructuralOCRV2Response resp = client.SmartStructuralOCRV2(req); // 输出json格式的字符串回包 System.out.println(SmartStructuralOCRV2Response.toJsonString(resp)); //解析数据 String s = SmartStructuralOCRV2Response.toJsonString(resp); // 解析JSON数据 JSONObject jsonObject = new JSONObject(s); JSONArray structuralList = jsonObject.getJSONArray("StructuralList"); // 遍历StructuralList中的Groups,获取Key对应的AutoName和Value对应的AutoConten StringBuilder stringBuilder = new StringBuilder(); // 创建一个StringBuilder对象 for (int i = 0; i < structuralList.length(); i++) { JSONArray groups = structuralList.getJSONObject(i).getJSONArray("Groups"); for (int j = 0; j < groups.length(); j++) { JSONArray lines = groups.getJSONObject(j).getJSONArray("Lines"); for (int k = 0; k < lines.length(); k++) { JSONObject line = lines.getJSONObject(k); JSONObject key = line.getJSONObject("Key"); JSONObject value = line.getJSONObject("Value"); String autoName = key.getString("AutoName"); String autoContent = value.getString("AutoContent"); String text = autoName + Constants.PDFSTR + autoContent; if (stringBuilder.length() > 0) { stringBuilder.append(Constants.BR); // 在已有内容的情况下添加逗号分隔符 } stringBuilder.append(text); // 拼接当前的字符串 } } } return stringBuilder.toString(); // 获取最终的拼接结果 } catch (TencentCloudSDKException e) { System.out.println(e.toString()); } return null; } public static String pdfConvertBase64(String pathUrl){ try { File file = new File(pathUrl); FileInputStream fileInputStream = new FileInputStream(file); byte[] fileBytes = new byte[(int) file.length()]; fileInputStream.read(fileBytes); fileInputStream.close(); // 将字节数组转换为Base64值 return Base64.getEncoder().encodeToString(fileBytes); } catch (IOException e) { e.printStackTrace(); } return null; } /** * 根据抓取规则获取内容 * @param ocrText ocr识别的text * @param fatchRules 抓取规则 * @return */ public static void fatchRuleGetContent(String ocrText, List fatchRules, Map fatchMap) { if (CollectionUtil.isNotEmpty(fatchRules)) { for (FatchRule fatchRule : fatchRules) { if (StrUtil.isEmpty(fatchRule.getStartContent())) { continue; } if (StrUtil.isNotEmpty(fatchRule.getStartContent()) && StrUtil.isNotEmpty(fatchRule.getEndContent())) { String s = StringUtils.substringBetween(ocrText, fatchRule.getStartContent(), fatchRule.getEndContent()); if(StrUtil.isNotEmpty(s)){ fatchMap.put(fatchRule.getColumnName(), StrUtil.trim(s)); }else { fatchMap.put(fatchRule.getColumnName(),""); } }else if(StrUtil.isNotEmpty(fatchRule.getStartContent()) && StrUtil.isEmpty(fatchRule.getEndContent())){ String s = StringUtils.substringAfter(ocrText,fatchRule.getStartContent()); if(StrUtil.isNotEmpty(s)){ fatchMap.put(fatchRule.getColumnName(), StrUtil.trim(s)); }else { fatchMap.put(fatchRule.getColumnName(),""); } } } } } }