xufei пре 4 година
родитељ
комит
b3db5f4a04

+ 1 - 1
src/main/scala/com/winhc/bigdata/spark/ng/change/NgChangeExtract.scala

@@ -185,7 +185,7 @@ object NgChangeExtract {
   private val startArgs = Seq(
     Args(tableName = "company_holder", primaryFields = "percent,deleted")
     , Args(tableName = "company_staff", primaryFields = "staff_type,deleted")
-    , Args(tableName = "company", primaryKey = "company_id", primaryFields = "name,cate_third_code,county_code,reg_capital_amount")
+    , Args(tableName = "company", primaryKey = "company_id", primaryFields = "name,cate_third_code,county_code,reg_capital_amount,legal_entity_name,deleted")
   )
 
 

+ 13 - 2
src/main/scala/com/winhc/bigdata/spark/utils/BaseUtil.scala

@@ -2,7 +2,7 @@ package com.winhc.bigdata.spark.utils
 
 import java.security.MessageDigest
 import java.text.SimpleDateFormat
-import java.util.regex.Pattern
+import java.util.regex.{Matcher, Pattern}
 import java.util.{Calendar, Date, Locale}
 
 import cn.hutool.core.util.StrUtil
@@ -427,7 +427,18 @@ object BaseUtil {
     s.toString()
   }
 
+  private val name_pat = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z()() ·,]")
+
+  def nameCleanup(name: String): String = {
+    var re = ""
+    if (StringUtils.isNotBlank(name)) {
+      re = name_pat.matcher(name.replaceAll("[、,;]", ",")).replaceAll("").replaceAll(" +", " ")
+    }
+    re
+  }
+
   def main(args: Array[String]): Unit = {
+    println(nameCleanup("小米科技.;有,@限公  司  雷军"))
     //    println(title("xx", null, "reason"))
     //    println(parseAddress("大石桥市人民法院"))
     //    println(case_no_trim("(2015)怀执字第03601号号"))
@@ -435,7 +446,7 @@ object BaseUtil {
     //    println(sortString(seq))
     //println(caseStage("(2019)鄂初7号"))
     val yg_name = ",,"
-    val bg_name = "张三,,小米,"
+    val bg_name = "张三,,小米,"
     println(compareName(yg_name, bg_name))
   }
 

+ 40 - 0
src/main/scala/com/winhc/bigdata/spark/utils/CompanyRelationUtils.scala

@@ -0,0 +1,40 @@
+package com.winhc.bigdata.spark.utils
+
+import com.winhc.bigdata.spark.implicits.CaseClass2JsonHelper._
+import com.winhc.bigdata.spark.utils.BaseUtil.cleanup
+import org.apache.commons.lang3.StringUtils
+
+/**
+ * @author: π
+ * @date: 2021/1/11
+ */
+case class company_node(id: String, name: String, deleted: String, topic_type: String)
+
+case class relation_holder(start_id: String, start_name: String, end_id: String,
+                           end_name: String, percent: Double, deleted: Int, holder_type: Int, topic_type: String)
+
+case class relation_staff(start_id: String, start_name: String, end_id: String,
+                          end_name: String, staff_type: String, deleted: Int, topic_type: String)
+
+case class relation_legal_entity(start_id: String, start_name: String, end_id: String,
+                                 end_name: String, deleted: Int, legal_entity_type: String, topic_type: String)
+
+
+object CompanyRelationUtils {
+
+  def get_company_node(id: String, name: String, deleted: String, topic_type: String): String =
+    company_node(id, name, deleted, topic_type).toJson()
+
+  def get_relation_holder(start_id: String, start_name: String, end_id: String,
+                          end_name: String, percent: Double, deleted: Int, holder_type: Int, topic_type: String): String =
+    relation_holder(start_id, start_name, end_id, end_name, percent, deleted, holder_type, topic_type).toJson()
+
+  def get_relation_staff(start_id: String, start_name: String, end_id: String,
+                         end_name: String, staff_type: String, deleted: Int, topic_type: String): String =
+    relation_staff(start_id, start_name, end_id, end_name, staff_type, deleted, topic_type).toJson()
+
+  def get_relation_legal_entity(start_id: String, start_name: String, end_id: String,
+                                end_name: String, deleted: Int, legal_entity_type: String, topic_type: String): String =
+    relation_legal_entity(start_id, start_name, end_id, end_name, deleted, legal_entity_type, topic_type).toJson()
+
+}

+ 28 - 2
src/main/scala/com/winhc/bigdata/spark/utils/HbaseUtil.scala

@@ -100,9 +100,35 @@ object HbaseUtil {
     tb.delete(deletes.asJava)
   }
 
+  def deleteRowKeysByCol(tb: Table, prefix: String,col:String, family: String = FAMILY_NAME) = {
+    import org.apache.hadoop.hbase.client.Delete
+    import scala.collection.JavaConverters._
+    val scan = new Scan()
+    scan.setRowPrefixFilter(prefix.getBytes())
+    val list = ListBuffer[String]()
+    val deletes: ListBuffer[Delete] = ListBuffer[Delete]()
+    try {
+      val scanner = tb.getScanner(scan)
+      import scala.collection.JavaConversions._
+      import collection.JavaConverters._
+      for (res <- scanner) {
+        val s = Bytes.toString(res.getRow)
+        if(s.contains(col)){
+          list.append(Bytes.toString(res.getRow))
+          val t1: Delete = new Delete(res.getRow)
+          deletes += t1
+        }
+      }
+    } catch {
+      case e: Throwable => e.printStackTrace()
+    }
+    tb.delete(deletes.asJava)
+    list.toList
+  }
+
   def main(args: Array[String]): Unit = {
-    val tb: Table = getTable("COMPANY_PATENT_LIST_INDEX_INCLUDE")
-    val rows: List[String] = deleteRowKeys(tb, "624378817,3137567565")
+    val tb: Table = getTable("WENSHU_DETAIL_COMBINE_INDEX")
+    val rows: List[String] = deleteRowKeysByCol(tb, "2318455639","33cc54856cfcd3d4cdd64f452aed8ede")
     println(rows)
     //val rows = List[String]("4")
     //deleteRows(tb, rows)