智能仲裁后端服务

OCRUtils.java 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. package com.ruoyi.wisdomarbitrate.utils;
  2. import cn.hutool.core.collection.CollectionUtil;
  3. import cn.hutool.core.util.StrUtil;
  4. import com.ruoyi.common.constant.Constants;
  5. import com.ruoyi.common.exception.ServiceException;
  6. import com.ruoyi.common.utils.StringUtils;
  7. import com.ruoyi.wisdomarbitrate.domain.FatchRule;
  8. import com.tencentcloudapi.bsca.v20210811.models.LicenseSummary;
  9. import com.tencentcloudapi.common.Credential;
  10. import com.tencentcloudapi.common.exception.TencentCloudSDKException;
  11. import com.tencentcloudapi.common.profile.ClientProfile;
  12. import com.tencentcloudapi.common.profile.HttpProfile;
  13. import com.tencentcloudapi.ocr.v20181119.OcrClient;
  14. import com.tencentcloudapi.ocr.v20181119.models.*;
  15. import org.json.JSONArray;
  16. import org.json.JSONObject;
  17. import java.io.File;
  18. import java.io.FileInputStream;
  19. import java.io.IOException;
  20. import java.util.*;
  21. import java.util.stream.Collectors;
  22. public class OCRUtils {
  23. //API的SecretId
  24. private static final String SECRET_ID = "AKIDeEf2A8uX1HSainvvnXAc3X9ZlhtyvkMp";
  25. //API的SecretKey
  26. private static final String SECRET_KEY = "QjphKo8zkHZigT8j9PVtFPJyfIvO3d6V";
  27. // 仲裁申请书识别字段
  28. private static final String[] applicantName = {"申请人", "统一社会信用代码", "负责人", "住所", "联系地址"
  29. , "委托代理人", "联系电话", "电子邮件", "被申请人", "居民身份证号码", "仲裁请求", "事实和理由"};
  30. // 贷款合同识别字段
  31. private static final String[] contractName = {
  32. "合同编号","甲方(贷款人)" ,"或委托代理人签字:","本合同的初始贷款年利率为","乙方确认有效的电子信箱地址为"};
  33. // 调解协议识别字段
  34. private static final String[] accordName = {"金融消费纠纷基本情况","经调解,双方自愿达成如下协议"};
  35. // 授权委托书识别字段
  36. private static final String[] powerAttorneyName = {"职务"};
  37. /**
  38. * pdf识别成文字
  39. * @param imageBase64
  40. * @param pageNumber
  41. * @param fatchRules 抓取规则
  42. * @return
  43. */
  44. public static String pdfIdentifyText(String imageBase64, Integer pageNumber, List<FatchRule> fatchRules) {
  45. StringBuilder respStr=new StringBuilder();
  46. try {
  47. // 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密
  48. // 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305
  49. // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
  50. Credential cred = new Credential(SECRET_ID, SECRET_KEY);
  51. // 实例化一个http选项,可选的,没有特殊需求可以跳过
  52. HttpProfile httpProfile = new HttpProfile();
  53. httpProfile.setEndpoint("ocr.tencentcloudapi.com");
  54. // 实例化一个client选项,可选的,没有特殊需求可以跳过
  55. ClientProfile clientProfile = new ClientProfile();
  56. clientProfile.setHttpProfile(httpProfile);
  57. // 实例化要请求产品的client对象,clientProfile是可选的
  58. OcrClient client = new OcrClient(cred, "ap-beijing", clientProfile);
  59. // 实例化一个请求对象,每个接口都会对应一个request对象
  60. GeneralAccurateOCRRequest req = new GeneralAccurateOCRRequest();
  61. req.setImageBase64(imageBase64);
  62. req.setIsPdf(true);
  63. req.setPdfPageNumber(pageNumber.longValue());
  64. // 返回的resp是一个SmartStructuralOCRV2Response的实例,与请求对象对应
  65. GeneralAccurateOCRResponse resp = client.GeneralAccurateOCR(req);
  66. // 输出json格式的字符串回包
  67. System.out.println(GeneralAccurateOCRResponse.toJsonString(resp));
  68. // 获取响应内容
  69. TextDetection[] textDetections = resp.getTextDetections();
  70. if (textDetections == null || textDetections.length == 0) {
  71. return respStr.toString();
  72. }
  73. for (TextDetection textDetection : textDetections) {
  74. respStr.append(textDetection.getDetectedText());
  75. }
  76. }catch (TencentCloudSDKException e){
  77. throw new ServiceException("ocr识别失败");
  78. }
  79. if(respStr.toString().endsWith(String.valueOf(pageNumber))){
  80. int lastIndexOf = respStr.toString().lastIndexOf(String.valueOf(pageNumber));
  81. return respStr.toString().substring(0,lastIndexOf);
  82. }
  83. return respStr.toString();
  84. //
  85. // //解析数据
  86. // String s = GeneralAccurateOCRResponse.toJsonString(resp);
  87. // // 解析JSON数据
  88. // JSONObject jsonObject = new JSONObject(s);
  89. // JSONArray structuralList = jsonObject.getJSONArray("TextDetections");
  90. // // 遍历StructuralList中的Groups,获取Key对应的AutoName和Value对应的AutoConten
  91. // StringBuilder stringBuilder = new StringBuilder(); // 创建一个StringBuilder对象
  92. // for (int i = 0; i < structuralList.length(); i++) {
  93. // JSONArray groups = structuralList.getJSONObject(i).getJSONArray("Groups");
  94. // for (int j = 0; j < groups.length(); j++) {
  95. // JSONArray lines = groups.getJSONObject(j).getJSONArray("Lines");
  96. // for (int k = 0; k < lines.length(); k++) {
  97. // JSONObject line = lines.getJSONObject(k);
  98. // JSONObject key = line.getJSONObject("Key");
  99. // JSONObject value = line.getJSONObject("Value");
  100. // String autoName = key.getString("AutoName");
  101. // String autoContent = value.getString("AutoContent");
  102. // String text = autoName + Constants.PDFSTR + autoContent;
  103. // if (stringBuilder.length() > 0) {
  104. // stringBuilder.append(Constants.BR); // 在已有内容的情况下添加逗号分隔符
  105. // }
  106. // stringBuilder.append(text); // 拼接当前的字符串
  107. // }
  108. // }
  109. // }
  110. // return stringBuilder.toString(); // 获取最终的拼接结果
  111. // } catch (TencentCloudSDKException e) {
  112. // System.out.println(e.toString());
  113. // }
  114. }
  115. /**
  116. * pdf识别成文字
  117. * @param imageBase64
  118. * @param pageNumber
  119. * @param type pdf类型
  120. * @return
  121. */
  122. public static String pdfIdentifyText1(String imageBase64, Integer pageNumber,String type) {
  123. try {
  124. // 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密
  125. // 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305
  126. // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
  127. Credential cred = new Credential(SECRET_ID, SECRET_KEY);
  128. // 实例化一个http选项,可选的,没有特殊需求可以跳过
  129. HttpProfile httpProfile = new HttpProfile();
  130. httpProfile.setEndpoint("ocr.tencentcloudapi.com");
  131. // 实例化一个client选项,可选的,没有特殊需求可以跳过
  132. ClientProfile clientProfile = new ClientProfile();
  133. clientProfile.setHttpProfile(httpProfile);
  134. // 实例化要请求产品的client对象,clientProfile是可选的
  135. OcrClient client = new OcrClient(cred, "ap-beijing", clientProfile);
  136. // 实例化一个请求对象,每个接口都会对应一个request对象
  137. SmartStructuralOCRV2Request req = new SmartStructuralOCRV2Request();
  138. req.setImageBase64(imageBase64);
  139. req.setIsPdf(true);
  140. req.setPdfPageNumber(pageNumber.longValue());
  141. if(type.contains("申请书")){
  142. req.setItemNames(applicantName);
  143. }else if(type.contains("调解协议")){
  144. req.setItemNames(accordName);
  145. }else if(type.contains("合同")){
  146. req.setItemNames(contractName);
  147. }else if(type.contains("授权委托书")){
  148. req.setItemNames(powerAttorneyName);
  149. }
  150. // 返回的resp是一个SmartStructuralOCRV2Response的实例,与请求对象对应
  151. SmartStructuralOCRV2Response resp = client.SmartStructuralOCRV2(req);
  152. // 输出json格式的字符串回包
  153. System.out.println(SmartStructuralOCRV2Response.toJsonString(resp));
  154. //解析数据
  155. String s = SmartStructuralOCRV2Response.toJsonString(resp);
  156. // 解析JSON数据
  157. JSONObject jsonObject = new JSONObject(s);
  158. JSONArray structuralList = jsonObject.getJSONArray("StructuralList");
  159. // 遍历StructuralList中的Groups,获取Key对应的AutoName和Value对应的AutoConten
  160. StringBuilder stringBuilder = new StringBuilder(); // 创建一个StringBuilder对象
  161. for (int i = 0; i < structuralList.length(); i++) {
  162. JSONArray groups = structuralList.getJSONObject(i).getJSONArray("Groups");
  163. for (int j = 0; j < groups.length(); j++) {
  164. JSONArray lines = groups.getJSONObject(j).getJSONArray("Lines");
  165. for (int k = 0; k < lines.length(); k++) {
  166. JSONObject line = lines.getJSONObject(k);
  167. JSONObject key = line.getJSONObject("Key");
  168. JSONObject value = line.getJSONObject("Value");
  169. String autoName = key.getString("AutoName");
  170. String autoContent = value.getString("AutoContent");
  171. String text = autoName + Constants.PDFSTR + autoContent;
  172. if (stringBuilder.length() > 0) {
  173. stringBuilder.append(Constants.BR); // 在已有内容的情况下添加逗号分隔符
  174. }
  175. stringBuilder.append(text); // 拼接当前的字符串
  176. }
  177. }
  178. }
  179. return stringBuilder.toString(); // 获取最终的拼接结果
  180. } catch (TencentCloudSDKException e) {
  181. System.out.println(e.toString());
  182. }
  183. return null;
  184. }
  185. public static String pdfConvertBase64(String pathUrl){
  186. try {
  187. File file = new File(pathUrl);
  188. FileInputStream fileInputStream = new FileInputStream(file);
  189. byte[] fileBytes = new byte[(int) file.length()];
  190. fileInputStream.read(fileBytes);
  191. fileInputStream.close();
  192. // 将字节数组转换为Base64值
  193. return Base64.getEncoder().encodeToString(fileBytes);
  194. } catch (IOException e) {
  195. e.printStackTrace();
  196. }
  197. return null;
  198. }
  199. /**
  200. * 根据抓取规则获取内容
  201. * @param ocrText ocr识别的text
  202. * @param fatchRules 抓取规则
  203. * @return
  204. */
  205. public static void fatchRuleGetContent(String ocrText, List<FatchRule> fatchRules, Map<String, String> fatchMap) {
  206. if (CollectionUtil.isNotEmpty(fatchRules)) {
  207. for (FatchRule fatchRule : fatchRules) {
  208. if (StrUtil.isEmpty(fatchRule.getStartContent())) {
  209. continue;
  210. }
  211. if (StrUtil.isNotEmpty(fatchRule.getStartContent()) && StrUtil.isNotEmpty(fatchRule.getEndContent())) {
  212. String s = StringUtils.substringBetween(ocrText, fatchRule.getStartContent(), fatchRule.getEndContent());
  213. if(StrUtil.isNotEmpty(s)){
  214. fatchMap.put(fatchRule.getColumnName(), StrUtil.trim(s));
  215. }else {
  216. fatchMap.put(fatchRule.getColumnName(),"");
  217. }
  218. }else if(StrUtil.isNotEmpty(fatchRule.getStartContent()) && StrUtil.isEmpty(fatchRule.getEndContent())){
  219. String s = StringUtils.substringAfter(ocrText,fatchRule.getStartContent());
  220. if(StrUtil.isNotEmpty(s)){
  221. fatchMap.put(fatchRule.getColumnName(), StrUtil.trim(s));
  222. }else {
  223. fatchMap.put(fatchRule.getColumnName(),"");
  224. }
  225. }
  226. }
  227. }
  228. }
  229. }