|
@@ -5,6 +5,7 @@ import java.util.regex.Pattern
|
|
|
import java.util.{Calendar, Date, Locale}
|
|
|
|
|
|
import cn.hutool.core.util.StrUtil
|
|
|
+import com.winhc.bigdata.spark.implicits.RegexUtils._
|
|
|
import org.apache.commons.lang3.StringUtils
|
|
|
import org.apache.commons.lang3.time.DateFormatUtils
|
|
|
import org.apache.spark.sql.SparkSession
|
|
@@ -215,10 +216,47 @@ object BaseUtil {
|
|
|
r
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 全角字符转半角字符
|
|
|
+ *
|
|
|
+ * @param str
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ def toDBC(str: String): String = {
|
|
|
+ val c = str.toCharArray
|
|
|
+ for (i <- 0 until c.length) {
|
|
|
+ if (c(i) == 12288) {
|
|
|
+ c(i) = 32.toChar
|
|
|
+ }
|
|
|
+ if (c(i) > 65280 && c(i) < 65375) c(i) = (c(i) - 65248).toChar
|
|
|
+ }
|
|
|
+ new String(c)
|
|
|
+ }
|
|
|
+
|
|
|
+ private val case_pat = ".*([(\\(]\\d{4}[)\\)][^号]*号?).*".r
|
|
|
+ private val year_pat = "(\\d{4}?)年".r
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 案号格式规整
|
|
|
+ *
|
|
|
+ * @param str
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ def case_no_trim(str: String): String = {
|
|
|
+ if (StringUtils.isBlank(str)) return null
|
|
|
+ var case_no = toDBC(str)
|
|
|
+ case_no = case_no.replace(" ", "");
|
|
|
+ if (case_no.length < 8) return null
|
|
|
+ case_no = year_pat.replaceAllIn(case_no, "\\($1\\)")
|
|
|
+
|
|
|
+ if (case_pat matches case_no) {
|
|
|
+ case_pat.replaceAllIn(case_no, "$1")
|
|
|
+ } else null
|
|
|
+ }
|
|
|
+
|
|
|
def main(args: Array[String]): Unit = {
|
|
|
- println(label("1"))
|
|
|
- println(label("0"))
|
|
|
- println(label("2"))
|
|
|
+ println(case_no_trim("2015年怀执字第03601号号"))
|
|
|
}
|
|
|
|
|
|
}
|