智能仲裁后端服务

OCRUtils.java 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. package com.ruoyi.wisdomarbitrate.utils;
  2. import cn.hutool.core.collection.CollectionUtil;
  3. import cn.hutool.core.util.StrUtil;
  4. import com.ruoyi.common.constant.Constants;
  5. import com.ruoyi.common.exception.ServiceException;
  6. import com.ruoyi.common.utils.StringUtils;
  7. import com.ruoyi.wisdomarbitrate.domain.FatchRule;
  8. import com.tencentcloudapi.bsca.v20210811.models.LicenseSummary;
  9. import com.tencentcloudapi.common.Credential;
  10. import com.tencentcloudapi.common.exception.TencentCloudSDKException;
  11. import com.tencentcloudapi.common.profile.ClientProfile;
  12. import com.tencentcloudapi.common.profile.HttpProfile;
  13. import com.tencentcloudapi.ocr.v20181119.OcrClient;
  14. import com.tencentcloudapi.ocr.v20181119.models.*;
  15. import org.json.JSONArray;
  16. import org.json.JSONObject;
  17. import java.io.File;
  18. import java.io.FileInputStream;
  19. import java.io.IOException;
  20. import java.util.*;
  21. import java.util.stream.Collectors;
  22. public class OCRUtils {
  23. //API的SecretId
  24. private static final String SECRET_ID = "AKIDeEf2A8uX1HSainvvnXAc3X9ZlhtyvkMp";
  25. //API的SecretKey
  26. private static final String SECRET_KEY = "QjphKo8zkHZigT8j9PVtFPJyfIvO3d6V";
  27. // 仲裁申请书识别字段
  28. private static final String[] applicantName = {"申请人", "统一社会信用代码", "负责人", "住所", "联系地址"
  29. , "委托代理人", "联系电话", "电子邮件", "被申请人", "居民身份证号码", "仲裁请求", "事实和理由"};
  30. // 贷款合同识别字段
  31. private static final String[] contractName = {
  32. "合同编号", "甲方(贷款人)", "或委托代理人签字:", "本合同的初始贷款年利率为", "乙方确认有效的电子信箱地址为"};
  33. // 调解协议识别字段
  34. private static final String[] accordName = {"金融消费纠纷基本情况", "经调解,双方自愿达成如下协议"};
  35. // 授权委托书识别字段
  36. private static final String[] powerAttorneyName = {"职务"};
  37. /**
  38. * pdf识别成文字
  39. *
  40. * @param imageBase64
  41. * @param pageNumber
  42. * @param fatchRules 抓取规则
  43. * @return
  44. */
  45. public static String pdfIdentifyText(String imageBase64, Integer pageNumber, List<FatchRule> fatchRules) {
  46. StringBuilder respStr = new StringBuilder();
  47. try {
  48. // 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密
  49. // 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305
  50. // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
  51. Credential cred = new Credential(SECRET_ID, SECRET_KEY);
  52. // 实例化一个http选项,可选的,没有特殊需求可以跳过
  53. HttpProfile httpProfile = new HttpProfile();
  54. httpProfile.setEndpoint("ocr.tencentcloudapi.com");
  55. // 实例化一个client选项,可选的,没有特殊需求可以跳过
  56. ClientProfile clientProfile = new ClientProfile();
  57. clientProfile.setHttpProfile(httpProfile);
  58. // 实例化要请求产品的client对象,clientProfile是可选的
  59. OcrClient client = new OcrClient(cred, "ap-beijing", clientProfile);
  60. // 实例化一个请求对象,每个接口都会对应一个request对象
  61. GeneralAccurateOCRRequest req = new GeneralAccurateOCRRequest();
  62. req.setImageBase64(imageBase64);
  63. req.setIsPdf(true);
  64. req.setPdfPageNumber(pageNumber.longValue());
  65. // 返回的resp是一个SmartStructuralOCRV2Response的实例,与请求对象对应
  66. GeneralAccurateOCRResponse resp = client.GeneralAccurateOCR(req);
  67. // 输出json格式的字符串回包
  68. System.out.println(GeneralAccurateOCRResponse.toJsonString(resp));
  69. // 获取响应内容
  70. TextDetection[] textDetections = resp.getTextDetections();
  71. if (textDetections == null || textDetections.length == 0) {
  72. return respStr.toString();
  73. }
  74. for (TextDetection textDetection : textDetections) {
  75. respStr.append(textDetection.getDetectedText());
  76. }
  77. } catch (TencentCloudSDKException e) {
  78. throw new ServiceException("ocr识别失败");
  79. }
  80. if (respStr.toString().endsWith(String.valueOf(pageNumber))) {
  81. int lastIndexOf = respStr.toString().lastIndexOf(String.valueOf(pageNumber));
  82. return respStr.toString().substring(0, lastIndexOf);
  83. }
  84. return respStr.toString();
  85. //
  86. // //解析数据
  87. // String s = GeneralAccurateOCRResponse.toJsonString(resp);
  88. // // 解析JSON数据
  89. // JSONObject jsonObject = new JSONObject(s);
  90. // JSONArray structuralList = jsonObject.getJSONArray("TextDetections");
  91. // // 遍历StructuralList中的Groups,获取Key对应的AutoName和Value对应的AutoConten
  92. // StringBuilder stringBuilder = new StringBuilder(); // 创建一个StringBuilder对象
  93. // for (int i = 0; i < structuralList.length(); i++) {
  94. // JSONArray groups = structuralList.getJSONObject(i).getJSONArray("Groups");
  95. // for (int j = 0; j < groups.length(); j++) {
  96. // JSONArray lines = groups.getJSONObject(j).getJSONArray("Lines");
  97. // for (int k = 0; k < lines.length(); k++) {
  98. // JSONObject line = lines.getJSONObject(k);
  99. // JSONObject key = line.getJSONObject("Key");
  100. // JSONObject value = line.getJSONObject("Value");
  101. // String autoName = key.getString("AutoName");
  102. // String autoContent = value.getString("AutoContent");
  103. // String text = autoName + Constants.PDFSTR + autoContent;
  104. // if (stringBuilder.length() > 0) {
  105. // stringBuilder.append(Constants.BR); // 在已有内容的情况下添加逗号分隔符
  106. // }
  107. // stringBuilder.append(text); // 拼接当前的字符串
  108. // }
  109. // }
  110. // }
  111. // return stringBuilder.toString(); // 获取最终的拼接结果
  112. // } catch (TencentCloudSDKException e) {
  113. // System.out.println(e.toString());
  114. // }
  115. }
  116. /**
  117. * pdf识别成文字
  118. *
  119. * @param imageBase64
  120. * @param pageNumber
  121. * @param type pdf类型
  122. * @return
  123. */
  124. public static String pdfIdentifyText1(String imageBase64, Integer pageNumber, String type) {
  125. try {
  126. // 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密
  127. // 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305
  128. // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
  129. Credential cred = new Credential(SECRET_ID, SECRET_KEY);
  130. // 实例化一个http选项,可选的,没有特殊需求可以跳过
  131. HttpProfile httpProfile = new HttpProfile();
  132. httpProfile.setEndpoint("ocr.tencentcloudapi.com");
  133. // 实例化一个client选项,可选的,没有特殊需求可以跳过
  134. ClientProfile clientProfile = new ClientProfile();
  135. clientProfile.setHttpProfile(httpProfile);
  136. // 实例化要请求产品的client对象,clientProfile是可选的
  137. OcrClient client = new OcrClient(cred, "ap-beijing", clientProfile);
  138. // 实例化一个请求对象,每个接口都会对应一个request对象
  139. SmartStructuralOCRV2Request req = new SmartStructuralOCRV2Request();
  140. req.setImageBase64(imageBase64);
  141. req.setIsPdf(true);
  142. req.setPdfPageNumber(pageNumber.longValue());
  143. if (type.contains("申请书")) {
  144. req.setItemNames(applicantName);
  145. } else if (type.contains("调解协议")) {
  146. req.setItemNames(accordName);
  147. } else if (type.contains("合同")) {
  148. req.setItemNames(contractName);
  149. } else if (type.contains("授权委托书")) {
  150. req.setItemNames(powerAttorneyName);
  151. }
  152. // 返回的resp是一个SmartStructuralOCRV2Response的实例,与请求对象对应
  153. SmartStructuralOCRV2Response resp = client.SmartStructuralOCRV2(req);
  154. // 输出json格式的字符串回包
  155. System.out.println(SmartStructuralOCRV2Response.toJsonString(resp));
  156. //解析数据
  157. String s = SmartStructuralOCRV2Response.toJsonString(resp);
  158. // 解析JSON数据
  159. JSONObject jsonObject = new JSONObject(s);
  160. JSONArray structuralList = jsonObject.getJSONArray("StructuralList");
  161. // 遍历StructuralList中的Groups,获取Key对应的AutoName和Value对应的AutoConten
  162. StringBuilder stringBuilder = new StringBuilder(); // 创建一个StringBuilder对象
  163. for (int i = 0; i < structuralList.length(); i++) {
  164. JSONArray groups = structuralList.getJSONObject(i).getJSONArray("Groups");
  165. for (int j = 0; j < groups.length(); j++) {
  166. JSONArray lines = groups.getJSONObject(j).getJSONArray("Lines");
  167. for (int k = 0; k < lines.length(); k++) {
  168. JSONObject line = lines.getJSONObject(k);
  169. JSONObject key = line.getJSONObject("Key");
  170. JSONObject value = line.getJSONObject("Value");
  171. String autoName = key.getString("AutoName");
  172. String autoContent = value.getString("AutoContent");
  173. String text = autoName + Constants.PDFSTR + autoContent;
  174. if (stringBuilder.length() > 0) {
  175. stringBuilder.append(Constants.BR); // 在已有内容的情况下添加逗号分隔符
  176. }
  177. stringBuilder.append(text); // 拼接当前的字符串
  178. }
  179. }
  180. }
  181. return stringBuilder.toString(); // 获取最终的拼接结果
  182. } catch (TencentCloudSDKException e) {
  183. System.out.println(e.toString());
  184. }
  185. return null;
  186. }
  187. public static String pdfConvertBase64(String pathUrl) {
  188. try {
  189. File file = new File(pathUrl);
  190. FileInputStream fileInputStream = new FileInputStream(file);
  191. byte[] fileBytes = new byte[(int) file.length()];
  192. fileInputStream.read(fileBytes);
  193. fileInputStream.close();
  194. // 将字节数组转换为Base64值
  195. return Base64.getEncoder().encodeToString(fileBytes);
  196. } catch (IOException e) {
  197. e.printStackTrace();
  198. }
  199. return null;
  200. }
  201. /**
  202. * 根据抓取规则获取内容
  203. *
  204. * @param ocrText ocr识别的text
  205. * @param fatchRules 抓取规则
  206. * @return
  207. */
  208. public static void fatchRuleGetContent(String ocrText, List<FatchRule> fatchRules, Map<String, String> fatchMap) {
  209. String text = ocrText;
  210. if (CollectionUtil.isNotEmpty(fatchRules)) {
  211. for (FatchRule fatchRule : fatchRules) {
  212. // 从后往前抓取
  213. if (fatchRule.getFatchOrder() != null && fatchRule.getFatchOrder() == 1) {
  214. // String reverseText = StrUtil.reverse(ocrText);
  215. // if (StrUtil.isEmpty(fatchRule.getStartContent()) && StrUtil.isEmpty(fatchRule.getEndContent())) {
  216. // fatchMap.put(fatchRule.getColumnName(), trimStr(text));
  217. // } else if(StrUtil.isEmpty(fatchRule.getStartContent())&&StrUtil.isNotEmpty(fatchRule.getEndContent())){
  218. // // 开始字段为空,结束字段不为空
  219. // String endReverseText = StrUtil.reverse(fatchRule.getEndContent());
  220. // int endContIndex = StrUtil.ordinalIndexOf(reverseText, endReverseText, fatchRule.getEndContentRepeatOrder());
  221. // if (endContIndex != -1) {
  222. // String substring = text.substring(0, endContIndex);
  223. // fatchMap.put(fatchRule.getColumnName(), trimStr(StrUtil.reverse(substring)));
  224. // }
  225. //
  226. //
  227. // }else if(StrUtil.isNotEmpty(fatchRule.getStartContent())&&StrUtil.isEmpty(fatchRule.getEndContent())){
  228. // // 开始字段不为空,结束字段为空
  229. // // 开始字段为空,结束字段不为空
  230. // String startReverseText = StrUtil.reverse(fatchRule.getStartContent());
  231. // int startContIndex = StrUtil.ordinalIndexOf(reverseText, startReverseText, fatchRule.getStartContentRepeatOrder());
  232. // if (startContIndex != -1) {
  233. // String substring = text.substring(0, startContIndex);
  234. // fatchMap.put(fatchRule.getColumnName(), trimStr(StrUtil.reverse(substring)));
  235. // }
  236. //
  237. // }else if(StrUtil.isNotEmpty(fatchRule.getStartContent())&&StrUtil.isNotEmpty(fatchRule.getEndContent())){
  238. // String startReverseText = StrUtil.reverse(fatchRule.getStartContent());
  239. // String endReverseText = StrUtil.reverse(fatchRule.getEndContent());
  240. // int startContIndex = StrUtil.ordinalIndexOf(reverseText, startReverseText, fatchRule.getStartContentRepeatOrder());
  241. // if (startContIndex != -1) {
  242. // // 结束字段不为空
  243. // int endContIndex = StrUtil.ordinalIndexOf(reverseText, endReverseText, fatchRule.getEndContentRepeatOrder());
  244. // if (endContIndex != -1 && (startContIndex + fatchRule.getStartContent().length()) <= text.length()) {
  245. // String substring = text.substring(startContIndex + fatchRule.getStartContent().length(), endContIndex);
  246. // // 去除\n
  247. // fatchMap.put(fatchRule.getColumnName(),trimStr(StrUtil.reverse(substring)));
  248. // }
  249. // }
  250. // }
  251. } else {
  252. // 开始为空结束为空
  253. if (StrUtil.isEmpty(fatchRule.getStartContent()) && StrUtil.isEmpty(fatchRule.getEndContent())) {
  254. fatchMap.put(fatchRule.getColumnName(), trimStr(text));
  255. } else if (StrUtil.isNotEmpty(fatchRule.getStartContent())) {
  256. int startContIndex = StrUtil.ordinalIndexOf(text, fatchRule.getStartContent(), fatchRule.getStartContentRepeatOrder());
  257. if (startContIndex != -1) {
  258. // 开始不为空结束为空
  259. if (StrUtil.isEmpty(fatchRule.getEndContent())) {
  260. if ((startContIndex + fatchRule.getStartContent().length()) <= text.length()) {
  261. String substring = text.substring(startContIndex + fatchRule.getStartContent().length());
  262. // 去除\n
  263. fatchMap.put(fatchRule.getColumnName(), trimStr(substring));
  264. }
  265. } else {
  266. // 开始不为空结束不为空
  267. int endContIndex = StrUtil.ordinalIndexOf(text, fatchRule.getEndContent(), fatchRule.getEndContentRepeatOrder());
  268. if (endContIndex != -1 && (startContIndex + fatchRule.getStartContent().length()) <= text.length()) {
  269. String substring = text.substring(startContIndex + fatchRule.getStartContent().length(), endContIndex);
  270. // 去除\n
  271. fatchMap.put(fatchRule.getColumnName(), trimStr(substring));
  272. }
  273. }
  274. //
  275. }
  276. } else if (StrUtil.isEmpty(fatchRule.getStartContent()) && StrUtil.isNotEmpty(fatchRule.getEndContent())) {
  277. // 开始为空结束不为空
  278. int endContIndex = StrUtil.ordinalIndexOf(text, fatchRule.getEndContent(), fatchRule.getEndContentRepeatOrder());
  279. if (endContIndex != -1) {
  280. String substring = text.substring(0, endContIndex);
  281. fatchMap.put(fatchRule.getColumnName(), trimStr(substring));
  282. }
  283. }
  284. }
  285. }
  286. }
  287. }
  288. /**
  289. * 去除末尾空格
  290. *
  291. * @param substring
  292. * @return
  293. */
  294. private static String trimStr(String substring) {
  295. if (StrUtil.isNotEmpty(substring)) {
  296. return StrUtil.trim(substring);
  297. } else {
  298. return "";
  299. }
  300. }
  301. }