|
@@ -0,0 +1,45 @@
|
|
|
|
+package com.winhc.bigdata.spark.udf
|
|
|
|
+
|
|
|
|
+import com.winhc.bigdata.spark.utils.LoggingUtils
|
|
|
|
+import org.apache.commons.lang3.StringUtils
|
|
|
|
+import org.apache.spark.sql.SparkSession
|
|
|
|
+
|
|
|
|
+import java.util.regex.Pattern
|
|
|
|
+import scala.annotation.meta.getter
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * @author: XuJiakai
|
|
|
|
+ * @date: 2021/1/26 17:34
|
|
|
|
+ */
|
|
|
|
+trait CompanyIndexFunc extends LoggingUtils {
|
|
|
|
+ @(transient@getter) protected val spark: SparkSession
|
|
|
|
+
|
|
|
|
+ def company_org_type_udf(): Unit = {
|
|
|
|
+ val pattern = Pattern.compile("[^\\u4e00-\\u9fa5()()]")
|
|
|
|
+
|
|
|
|
+ def get_company_org_type_std(company_name: String, company_org_type2: String): String = {
|
|
|
|
+ if (company_name.contains("有限责任公司")) "有限责任公司"
|
|
|
|
+ else if (company_name.contains("股份")) "股份有限公司"
|
|
|
|
+ else if (company_name.contains("有限公司")) "有限责任公司"
|
|
|
|
+ else {
|
|
|
|
+ if (StringUtils.isEmpty(company_org_type2)) return null
|
|
|
|
+ val company_org_type = pattern.matcher(company_org_type2).replaceAll("")
|
|
|
|
+ if (StringUtils.isEmpty(company_org_type)) return null
|
|
|
|
+ if (company_org_type.contains("有限责任公司")) "有限责任公司"
|
|
|
|
+ else if (company_org_type.contains("股份")) "股份有限公司"
|
|
|
|
+ else if (company_org_type.contains("一人有限责任公司") || company_org_type.contains("个人独资企业") || company_org_type.contains("一人有限责任")) "独资企业"
|
|
|
|
+ else if (company_org_type.contains("个人经营") || company_org_type.contains("个体")) "个体工商户"
|
|
|
|
+ else if (company_org_type.contains("专业合作") || company_org_type.contains("集体")) "集体所有制"
|
|
|
|
+ else if (company_org_type.contains("有限") && company_org_type.contains("合伙")) "有限合伙"
|
|
|
|
+ else if (company_org_type.contains("合伙")) "普通合伙"
|
|
|
|
+ else if (company_org_type.contains("国有") || company_org_type.contains("全民所有制") || company_org_type.contains("国资")) "国企"
|
|
|
|
+ else if (company_org_type.contains("外商") || company_org_type.contains("中外") || company_org_type.contains("外国") || company_org_type.contains("外资")) "外商投资企业"
|
|
|
|
+ else if (company_org_type.contains("联营")) "联营企业"
|
|
|
|
+ else null
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ spark.udf.register("get_company_org_type_std", get_company_org_type_std _)
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|