From 9812f733ed6092f50e53ef928a1cbc7a773c4e68 Mon Sep 17 00:00:00 2001 From: wsczm <646659000@qq.com> Date: Mon, 4 Dec 2023 14:15:21 +0800 Subject: [PATCH 1/9] =?UTF-8?q?flink=20sql=E5=AD=97=E6=AE=B5=E8=A1=80?= =?UTF-8?q?=E7=BC=98=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../common/bean/lineage/LineageResult.java | 100 ++++ .../fire/common/bean/lineage/SQLLineage.java | 11 +- .../lineage/SQLTableColumnsRelations.java | 45 ++ .../com/zto/fire/common/pool/ClassEntity.java | 59 ++ .../com/zto/fire/common/pool/ClassPool.java | 59 ++ .../com/zto/fire/common/util/Constant.java | 38 ++ .../common/lineage/SQLLineageManager.scala | 7 + .../com/zto/fire/core/sql/SqlParser.scala | 5 +- .../fire/flink/lineage/LineageContext.scala | 108 ++++ .../fire/flink/lineage/LineageContext.scala | 104 ++++ .../fire/flink/lineage/LineageContext.scala | 103 ++++ .../fire/flink/lineage/LineageContext.scala | 104 ++++ .../fire/flink/lineage/LineageContext.scala | 104 ++++ .../ext/stream/StreamExecutionEnvExt.scala | 4 +- .../fire/flink/sql/FlinkSqlParserBase.scala | 32 +- .../flink/util/FlinkSingletonFactory.scala | 3 + .../calcite/rel/metadata/RelColumnOrigin.java | 125 ++++ .../rel/metadata/RelMdColumnOrigins.java | 551 ++++++++++++++++++ .../calcite/rel/metadata/RelColumnOrigin.java | 125 ++++ .../rel/metadata/RelMdColumnOrigins.java | 520 +++++++++++++++++ .../org/apache/calcite/sql/SqlSelect.java | 355 +++++++++++ .../calcite/rel/metadata/RelColumnOrigin.java | 124 ++++ .../rel/metadata/RelMdColumnOrigins.java | 550 +++++++++++++++++ .../calcite/rel/metadata/RelColumnOrigin.java | 125 ++++ .../rel/metadata/RelMdColumnOrigins.java | 551 ++++++++++++++++++ .../calcite/rel/metadata/RelColumnOrigin.java | 125 ++++ .../rel/metadata/RelMdColumnOrigins.java | 551 ++++++++++++++++++ 27 files changed, 4577 insertions(+), 11 deletions(-) create mode 100644 fire-common/src/main/java/com/zto/fire/common/bean/lineage/LineageResult.java create mode 100644 fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableColumnsRelations.java create mode 100644 fire-common/src/main/java/com/zto/fire/common/pool/ClassEntity.java create mode 100644 fire-common/src/main/java/com/zto/fire/common/pool/ClassPool.java create mode 100644 fire-common/src/main/java/com/zto/fire/common/util/Constant.java create mode 100644 fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala create mode 100644 fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala create mode 100644 fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala create mode 100644 fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala create mode 100644 fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelColumnOrigin.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelColumnOrigin.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/sql/SqlSelect.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelColumnOrigin.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelColumnOrigin.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelColumnOrigin.java create mode 100644 fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java diff --git a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/LineageResult.java b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/LineageResult.java new file mode 100644 index 00000000..4b7c707a --- /dev/null +++ b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/LineageResult.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.zto.fire.common.bean.lineage; + +import com.zto.fire.common.util.Constant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * @description: Result + * @author: lmz + */ +@Data +@Builder +@AllArgsConstructor +public class LineageResult { + + private String sourceCatalog; + + private String sourceDatabase; + + private String sourceTable; + + private String sourceColumn; + + private String targetCatalog; + + private String targetDatabase; + + private String targetTable; + + private String targetColumn; + + /** + * Stores the expression for data conversion, + * which source table fields are transformed by which expression the target field + */ + private String transform; + + public LineageResult(String sourceTablePath, String sourceColumn, String targetTablePath, String targetColumn, + String transform) { + String[] sourceItems = sourceTablePath.split("\\" + Constant.DELIMITER); + String[] targetItems = targetTablePath.split("\\" + Constant.DELIMITER); + + this.sourceCatalog = sourceItems[0]; + this.sourceDatabase = sourceItems[1]; + this.sourceTable = sourceItems[2]; + this.sourceColumn = sourceColumn; + this.targetCatalog = targetItems[0]; + this.targetDatabase = targetItems[1]; + this.targetTable = targetItems[2]; + this.targetColumn = targetColumn; + this.transform = transform; + } + + public LineageResult(String catalog, String database, String sourceTable, String sourceColumn, String targetTable, + String targetColumn) { + this.sourceCatalog = catalog; + this.sourceDatabase = database; + this.sourceTable = sourceTable; + this.sourceColumn = sourceColumn; + this.targetCatalog = catalog; + this.targetDatabase = database; + this.targetTable = targetTable; + this.targetColumn = targetColumn; + } + + public static List buildResult(String catalog, String database, String[][] expectedArray) { + return Stream.of(expectedArray) + .map(e -> { + LineageResult result = new LineageResult(catalog, database, e[0], e[1], e[2], e[3]); + // transform field is optional + if (e.length == 5) { + result.setTransform(e[4]); + } + return result; + }).collect(Collectors.toList()); + } +} diff --git a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLLineage.java b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLLineage.java index bccdb4fe..720ef52d 100644 --- a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLLineage.java +++ b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLLineage.java @@ -42,11 +42,12 @@ public class SQLLineage { * 描述表与表之前的数据血缘关系 */ private List relations; - + private List colRelations; public SQLLineage() { this.statements = new LinkedList<>(); this.tables = new LinkedList<>(); this.relations = new LinkedList<>(); + this.colRelations = new LinkedList<>(); } public List getStatements() { @@ -73,4 +74,12 @@ public class SQLLineage { return relations; } + public List getColRelations() { + return colRelations; + } + + public void setColRelations(List colRelations) { + this.colRelations = colRelations; + } + } \ No newline at end of file diff --git a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableColumnsRelations.java b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableColumnsRelations.java new file mode 100644 index 00000000..33962f56 --- /dev/null +++ b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableColumnsRelations.java @@ -0,0 +1,45 @@ +package com.zto.fire.common.bean.lineage; + +import java.util.Objects; + +public class SQLTableColumnsRelations { + private String sourceColumn; + private String targetColumn; + + public SQLTableColumnsRelations(String sourceColumn, String targetColumn) { + this.sourceColumn = sourceColumn; + this.targetColumn = targetColumn; + } + + public String getSourceColumn() { + return sourceColumn; + } + + public void setSourceColumn(String sourceColumn) { + this.sourceColumn = sourceColumn; + } + + public String getTargetColumn() { + return targetColumn; + } + + + public void setTargetColumn(String targetColumn) { + this.targetColumn = targetColumn; + } + + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SQLTableColumnsRelations that = (SQLTableColumnsRelations) o; + return Objects.equals(sourceColumn, that.sourceColumn) && Objects.equals(targetColumn, that.targetColumn); + } + + @Override + public int hashCode() { + return Objects.hash(sourceColumn, targetColumn); + } + +} diff --git a/fire-common/src/main/java/com/zto/fire/common/pool/ClassEntity.java b/fire-common/src/main/java/com/zto/fire/common/pool/ClassEntity.java new file mode 100644 index 00000000..1b065f6e --- /dev/null +++ b/fire-common/src/main/java/com/zto/fire/common/pool/ClassEntity.java @@ -0,0 +1,59 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.zto.fire.common.pool; + +import lombok.Getter; +import lombok.Setter; + +import java.util.Objects; + +/** + * ClassEntity + */ +@Getter +@Setter +public class ClassEntity { + private String name; + private String code; + private byte[] classByte; + + public ClassEntity(String name, String code) { + this.name = name; + this.code = code; + } + + public ClassEntity(String name, String code, byte[] classByte) { + this.name = name; + this.code = code; + this.classByte = classByte; + } + + public static ClassEntity build(String name, String code) { + return new ClassEntity(name, code); + } + + public boolean equals(ClassEntity entity) { + if (name.equals(entity.getName()) && code.equals(entity.getCode())) { + return true; + } else { + return false; + } + } +} diff --git a/fire-common/src/main/java/com/zto/fire/common/pool/ClassPool.java b/fire-common/src/main/java/com/zto/fire/common/pool/ClassPool.java new file mode 100644 index 00000000..a3b4d677 --- /dev/null +++ b/fire-common/src/main/java/com/zto/fire/common/pool/ClassPool.java @@ -0,0 +1,59 @@ + +package com.zto.fire.common.pool; + +import java.util.List; +import java.util.Vector; + +/** + * ClassPool + */ +public class ClassPool { + + private static volatile List classList = new Vector<>(); + + public static boolean exist(String name) { + for (ClassEntity executorEntity : classList) { + if (executorEntity.getName().equals(name)) { + return true; + } + } + return false; + } + + public static boolean exist(ClassEntity entity) { + for (ClassEntity executorEntity : classList) { + if (executorEntity.equals(entity)) { + return true; + } + } + return false; + } + + public static Integer push(ClassEntity executorEntity) { + if (exist(executorEntity.getName())) { + remove(executorEntity.getName()); + } + classList.add(executorEntity); + return classList.size(); + } + + public static Integer remove(String name) { + int count = classList.size(); + for (int i = 0; i < classList.size(); i++) { + if (name.equals(classList.get(i).getName())) { + classList.remove(i); + break; + } + } + return count - classList.size(); + } + + public static ClassEntity get(String name) { + for (ClassEntity executorEntity : classList) { + if (executorEntity.getName().equals(name)) { + return executorEntity; + } + } + return null; + } +} diff --git a/fire-common/src/main/java/com/zto/fire/common/util/Constant.java b/fire-common/src/main/java/com/zto/fire/common/util/Constant.java new file mode 100644 index 00000000..fc59e649 --- /dev/null +++ b/fire-common/src/main/java/com/zto/fire/common/util/Constant.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.zto.fire.common.util; + +/** + * @description: Constant + * @author: lmz + */ +public class Constant { + + private Constant() { + throw new IllegalStateException("Utility class"); + } + + public static final String DELIMITER = "."; + + public static final int INITIAL_CAPACITY = 16; + + public static final String ILLEGAL_PARAM = "illegal param"; + + public static final Long DEFAULT_USER_ID = 0L; +} diff --git a/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala b/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala index 3710c133..7d0b3c9d 100644 --- a/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala +++ b/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala @@ -32,6 +32,7 @@ private[fire] object SQLLineageManager { private lazy val statementSet = new JHashSet[String]() private lazy val relationSet = new JHashSet[SQLTableRelations]() private lazy val tableLineageMap = new JConcurrentHashMap[String, SQLTable]() + private lazy val colRelationSet = new JHashSet[SQLTableColumnsRelations]() /** * 添加待执行的SQL语句 @@ -52,6 +53,10 @@ private[fire] object SQLLineageManager { this.relationSet.add(new SQLTableRelations(srcTableIdentifier.toString, sinkTableIdentifier.toString)) } + def addColRelation(sourceColumn: String, targetColumn: String): Unit = { + this.colRelationSet.add(new SQLTableColumnsRelations(sourceColumn, targetColumn)) + } + /** * 获取SQL血缘信息 */ @@ -60,6 +65,7 @@ private[fire] object SQLLineageManager { sqlLineage.setStatements(this.statementSet.toList) sqlLineage.setTables(this.tableLineageMap.values().toList) sqlLineage.setRelations(this.relationSet.toList) + sqlLineage.setColRelations(this.colRelationSet.toList) sqlLineage } @@ -247,4 +253,5 @@ private[fire] object SQLLineageManager { } } + } diff --git a/fire-core/src/main/scala/com/zto/fire/core/sql/SqlParser.scala b/fire-core/src/main/scala/com/zto/fire/core/sql/SqlParser.scala index 292d8786..f4a70eea 100644 --- a/fire-core/src/main/scala/com/zto/fire/core/sql/SqlParser.scala +++ b/fire-core/src/main/scala/com/zto/fire/core/sql/SqlParser.scala @@ -21,10 +21,10 @@ import com.zto.fire.common.anno.Internal import com.zto.fire.common.bean.TableIdentifier import com.zto.fire.common.conf.FireFrameworkConf._ import com.zto.fire.common.lineage.{LineageManager, SQLLineageManager} -import com.zto.fire.common.lineage.parser.connector._ import com.zto.fire.common.util.{Logging, SQLUtils, ThreadUtils} import com.zto.fire.predef._ + import java.util.concurrent.{CopyOnWriteArraySet, TimeUnit} /** @@ -46,7 +46,8 @@ private[fire] trait SqlParser extends Logging { protected def sqlParse: Unit = { if (lineageEnable) { ThreadUtils.scheduleWithFixedDelay({ - this.buffer.foreach(sql => this.sqlParser(sql)) + this.buffer.foreach( + sql => this.sqlParser(sql)) this.clear }, lineageRunInitialDelay, lineageRunPeriod, TimeUnit.SECONDS) } diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala new file mode 100644 index 00000000..c610e6ef --- /dev/null +++ b/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala @@ -0,0 +1,108 @@ +package com.zto.fire.flink.lineage + +import com.zto.fire.common.bean.lineage.LineageResult +import com.zto.fire.common.util.{Constant, Logging} +import org.apache.calcite.rel.RelNode +import org.apache.calcite.rel.metadata.{JaninoRelMetadataProvider, RelColumnOrigin, RelMetadataQueryBase} +import org.apache.commons.lang.StringUtils +import org.apache.flink.table.api.internal.TableEnvironmentImpl +import org.apache.flink.table.api.{TableException, ValidationException} +import org.apache.flink.table.operations.{CatalogSinkModifyOperation, Operation} +import org.apache.flink.table.planner.operations.PlannerQueryOperation +import org.apache.flink.table.planner.plan.metadata.FlinkDefaultRelMetadataProvider +import org.apache.flink.table.planner.plan.schema.TableSourceTable + +import java.util +import scala.collection.JavaConverters.{asScalaBufferConverter, asScalaSetConverter, seqAsJavaListConverter} +import scala.collection.mutable.ListBuffer +import scala.language.postfixOps + + +class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { + + private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { + val queryFieldList = relNode.getRowType.getFieldNames + if (queryFieldList.size() != sinkFieldList.size()) { + throw new ValidationException( + String.format( + "Column types of query result and sink for %s do not match.\n" + + "Query schema: %s\n" + + "Sink schema: %s", + sinkTable, queryFieldList, sinkFieldList)) + } + } + + def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode): ListBuffer[LineageResult] = { + val targetColumnList: util.List[String] = tableEnv.from(sinkTable) + .getSchema + .getTableColumns + .asScala + .map(tableColumn => tableColumn.getName) + .toList + .asJava + + validateSchema(sinkTable, optRelNode, targetColumnList) + val metadataQuery = optRelNode.getCluster.getMetadataQuery + val resultList = ListBuffer[LineageResult]() + + for (index <- 0 until targetColumnList.size) { + val targetColumn = targetColumnList.get(index) + + val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index).asScala + if (relColumnOriginSet.nonEmpty) { + for (rco: RelColumnOrigin <- relColumnOriginSet) { + // table + val table = rco.getOriginTable + val sourceTable = String.join(Constant.DELIMITER, table.getQualifiedName) + // field + val ordinal = rco.getOriginColumnOrdinal + val fieldNames = table.asInstanceOf[TableSourceTable].catalogTable.getSchema.getFieldNames + val sourceColumn = fieldNames(ordinal) + this.logger.info("----------------------------------------------------------") + this.logger.info("Source table: {}", sourceTable) + this.logger.info("Source column: {}", sourceColumn) + if (StringUtils.isNotEmpty(rco.getTransform)) { + this.logger.info("transform: {}", rco.getTransform) + } + // add record + resultList += new LineageResult(sourceTable, sourceColumn, sinkTable, targetColumn, rco.getTransform) + } + } + } + resultList + + + } + + def analyzeLineage(sql: String) = { + + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val parsed = parseStatement(sql) + val sinkTable = parsed._1 + val oriRelNode = parsed._2 + buildFiledLineageResult(sinkTable, oriRelNode) + } + + private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { + val operation = parseValidateConvert(singleSql) + operation match { + case sinkOperation: CatalogSinkModifyOperation => + val queryOperation = sinkOperation.getChild.asInstanceOf[PlannerQueryOperation] + val relNode = queryOperation.getCalciteTree + Tuple2(sinkOperation.getTableIdentifier.asSummaryString(), relNode) + case _ => + throw new TableException("Only insert is supported now.") + } + + + } + + private def parseValidateConvert(singleSql: String) = { + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) + if (operations.size() != 1) { + throw new TableException("Unsupported SQL query! only accepts a single SQL statement.") + } + operations.get(0) + } +} diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala new file mode 100644 index 00000000..af6371e9 --- /dev/null +++ b/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala @@ -0,0 +1,104 @@ +package com.zto.fire.flink.lineage + +import com.zto.fire.common.bean.lineage.LineageResult +import com.zto.fire.common.util.{Constant, Logging} +import org.apache.calcite.rel.RelNode +import org.apache.calcite.rel.metadata.{JaninoRelMetadataProvider, RelColumnOrigin, RelMetadataQueryBase} +import org.apache.commons.lang.StringUtils +import org.apache.flink.table.api.internal.TableEnvironmentImpl +import org.apache.flink.table.api.{TableException, ValidationException} +import org.apache.flink.table.operations.{CatalogSinkModifyOperation, Operation} +import org.apache.flink.table.planner.operations.PlannerQueryOperation +import org.apache.flink.table.planner.plan.metadata.FlinkDefaultRelMetadataProvider +import org.apache.flink.table.planner.plan.schema.TableSourceTable + +import java.util +import scala.collection.JavaConverters.asScalaSetConverter +import scala.collection.mutable.ListBuffer +import scala.language.postfixOps + + +class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { + + private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { + val queryFieldList = relNode.getRowType.getFieldNames + if (queryFieldList.size() != sinkFieldList.size()) { + throw new ValidationException( + String.format( + "Column types of query result and sink for %s do not match.\n" + + "Query schema: %s\n" + + "Sink schema: %s", + sinkTable, queryFieldList, sinkFieldList)) + } + } + + def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode): ListBuffer[LineageResult] = { + val targetColumnList = tableEnv.from(sinkTable) + .getResolvedSchema + .getColumnNames + validateSchema(sinkTable, optRelNode, targetColumnList) + val metadataQuery = optRelNode.getCluster.getMetadataQuery + val resultList = ListBuffer[LineageResult]() + + for (index <- 0 until targetColumnList.size) { + val targetColumn = targetColumnList.get(index) + + val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index).asScala + import scala.collection.JavaConverters + if (relColumnOriginSet.nonEmpty) { + for (rco: RelColumnOrigin <- relColumnOriginSet) { + // table + val table = rco.getOriginTable + val sourceTable = String.join(Constant.DELIMITER, table.getQualifiedName) + // field + val ordinal = rco.getOriginColumnOrdinal + val fieldNames = table.asInstanceOf[TableSourceTable].catalogTable.getResolvedSchema.getColumnNames + val sourceColumn = fieldNames.get(ordinal) + this.logger.info("----------------------------------------------------------") + this.logger.info("Source table: {}", sourceTable) + this.logger.info("Source column: {}", sourceColumn) + if (StringUtils.isNotEmpty(rco.getTransform)) { + this.logger.info("transform: {}", rco.getTransform) + } + // add record + resultList += new LineageResult(sourceTable, sourceColumn, sinkTable, targetColumn, rco.getTransform) + } + } + } + resultList + + + } + + def analyzeLineage(sql: String) = { + + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val parsed = parseStatement(sql) + val sinkTable = parsed._1 + val oriRelNode = parsed._2 + buildFiledLineageResult(sinkTable, oriRelNode) + } + + private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { + val operation = parseValidateConvert(singleSql) + operation match { + case sinkOperation: CatalogSinkModifyOperation => + val queryOperation = sinkOperation.getChild.asInstanceOf[PlannerQueryOperation] + val relNode = queryOperation.getCalciteTree + Tuple2(sinkOperation.getTableIdentifier.asSummaryString(), relNode) + case _ => + throw new TableException("Only insert is supported now.") + } + + + } + + private def parseValidateConvert(singleSql: String) = { + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) + if (operations.size() != 1) { + throw new TableException("Unsupported SQL query! only accepts a single SQL statement.") + } + operations.get(0) + } +} diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala new file mode 100644 index 00000000..914cb8d8 --- /dev/null +++ b/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala @@ -0,0 +1,103 @@ +package com.zto.fire.flink.lineage + +import com.zto.fire.common.bean.lineage.LineageResult +import com.zto.fire.common.util.Constant +import org.apache.calcite.rel.RelNode +import org.apache.calcite.rel.metadata.{JaninoRelMetadataProvider, RelColumnOrigin, RelMetadataQueryBase} +import org.apache.commons.lang3.StringUtils +import org.apache.flink.table.api.internal.TableEnvironmentImpl +import org.apache.flink.table.api.{TableException, ValidationException} +import org.apache.flink.table.operations.{CatalogSinkModifyOperation, Operation} +import org.apache.flink.table.planner.operations.PlannerQueryOperation +import org.apache.flink.table.planner.plan.metadata.FlinkDefaultRelMetadataProvider +import org.apache.flink.table.planner.plan.schema.TableSourceTable + +import java.util +import scala.collection.JavaConverters.asScalaSetConverter +import scala.collection.mutable.ListBuffer +import scala.language.postfixOps + + +class LineageContext(tableEnv: TableEnvironmentImpl) { + + private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { + val queryFieldList = relNode.getRowType.getFieldNames + if (queryFieldList.size() != sinkFieldList.size()) { + throw new ValidationException( + String.format( + "Column types of query result and sink for %s do not match.\n" + + "Query schema: %s\n" + + "Sink schema: %s", + sinkTable, queryFieldList, sinkFieldList)) + } + } + + def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode):ListBuffer[LineageResult] = { + val targetColumnList = tableEnv.from(sinkTable) + .getResolvedSchema + .getColumnNames + validateSchema(sinkTable, optRelNode, targetColumnList) + val metadataQuery = optRelNode.getCluster.getMetadataQuery + val resultList = ListBuffer[LineageResult]() + + for (index <- 0 until targetColumnList.size) { + val targetColumn = targetColumnList.get(index) + + val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index).asScala + if (relColumnOriginSet.isEmpty) { + for (rco: RelColumnOrigin <- relColumnOriginSet) { + // table + val table = rco.getOriginTable + val sourceTable = String.join(Constant.DELIMITER, table.getQualifiedName) + // field + val ordinal = rco.getOriginColumnOrdinal + val fieldNames = table.asInstanceOf[TableSourceTable].catalogTable.getResolvedSchema.getColumnNames + val sourceColumn = fieldNames.get(ordinal) + println("----------------------------------------------------------") + println("Source table: {}", sourceTable) + println("Source column: {}", sourceColumn) + if (StringUtils.isNotEmpty(rco.getTransform)) { + println("transform: {}", rco.getTransform) + } + // add record + resultList += new LineageResult(sourceTable, sourceColumn, sinkTable, targetColumn, rco.getTransform) + } + } + } + resultList + + + } + + def analyzeLineage(sql: String) = { + + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val parsed = parseStatement(sql) + val sinkTable = parsed._1 + val oriRelNode = parsed._2 + buildFiledLineageResult(sinkTable, oriRelNode) + } + + private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { + val operation = parseValidateConvert(singleSql) + operation match { + case sinkOperation: CatalogSinkModifyOperation => + val queryOperation = sinkOperation.getChild.asInstanceOf[PlannerQueryOperation] + val relNode = queryOperation.getCalciteTree + Tuple2(sinkOperation.getTableIdentifier.asSummaryString(), relNode) + case _ => + throw new TableException("Only insert is supported now.") + } + + + } + + private def parseValidateConvert(singleSql: String) = { + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) + if (operations.size() != 1) { + throw new TableException("Unsupported SQL query! only accepts a single SQL statement.") + } + operations.get(0) + } +} diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala new file mode 100644 index 00000000..5ca78f46 --- /dev/null +++ b/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala @@ -0,0 +1,104 @@ +package com.zto.fire.flink.lineage + +import com.zto.fire.common.bean.lineage.LineageResult +import com.zto.fire.common.util.Constant +import org.apache.calcite.rel.RelNode +import org.apache.calcite.rel.metadata.{JaninoRelMetadataProvider, RelColumnOrigin, RelMetadataQueryBase} +import org.apache.commons.lang3.StringUtils +import org.apache.flink.table.api.{TableException, ValidationException} +import org.apache.flink.table.api.internal.TableEnvironmentImpl +import org.apache.flink.table.operations.{Operation, SinkModifyOperation} +import org.apache.flink.table.planner.operations.PlannerQueryOperation +import org.apache.flink.table.planner.plan.metadata.FlinkDefaultRelMetadataProvider +import org.apache.flink.table.planner.plan.schema.TableSourceTable + +import java.util +import scala.collection.mutable.ListBuffer +import scala.language.postfixOps + + +class LineageContext(tableEnv: TableEnvironmentImpl) { + + private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { + val queryFieldList = relNode.getRowType.getFieldNames + if (queryFieldList.size() != sinkFieldList.size()) { + throw new ValidationException( + String.format( + "Column types of query result and sink for %s do not match.\n" + + "Query schema: %s\n" + + "Sink schema: %s", + sinkTable, queryFieldList, sinkFieldList)) + } + } + + def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode):ListBuffer[LineageResult] = { + val targetColumnList = tableEnv.from(sinkTable) + .getResolvedSchema + .getColumnNames + validateSchema(sinkTable, optRelNode, targetColumnList) + val metadataQuery = optRelNode.getCluster.getMetadataQuery + val resultList = ListBuffer[LineageResult]() + + for (index <- 0 until targetColumnList.size) { + val targetColumn = targetColumnList.get(index) + + val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index) + import scala.collection.JavaConversions._ + + if (!relColumnOriginSet.isEmpty) { + for (rco: RelColumnOrigin <- relColumnOriginSet) { + // table + val table = rco.getOriginTable + val sourceTable = String.join(Constant.DELIMITER, table.getQualifiedName) + // field + val ordinal = rco.getOriginColumnOrdinal + val fieldNames = table.asInstanceOf[TableSourceTable].contextResolvedTable.getResolvedSchema.getColumnNames + val sourceColumn = fieldNames.get(ordinal) + println("----------------------------------------------------------") + println("Source table: {}", sourceTable) + println("Source column: {}", sourceColumn) + if (StringUtils.isNotEmpty(rco.getTransform)) { + println("transform: {}", rco.getTransform) + } + // add record + resultList += new LineageResult(sourceTable, sourceColumn, sinkTable, targetColumn, rco.getTransform) + } + } + } + resultList + + + } + + def analyzeLineage(sql: String) = { + + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val parsed = parseStatement(sql) + val sinkTable = parsed._1 + val oriRelNode = parsed._2 + buildFiledLineageResult(sinkTable, oriRelNode) + } + + private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { + val operation = parseValidateConvert(singleSql) + operation match { + case sinkOperation: SinkModifyOperation => + val queryOperation = sinkOperation.getChild.asInstanceOf[PlannerQueryOperation] + val relNode = queryOperation.getCalciteTree + Tuple2(sinkOperation.getContextResolvedTable.getIdentifier.asSummaryString(), relNode) + case _ => + throw new TableException("Only insert is supported now.") + } + + + } + + private def parseValidateConvert(singleSql: String) = { + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) + if (operations.size() != 1) { + throw new TableException("Unsupported SQL query! only accepts a single SQL statement.") + } + operations.get(0) + } +} diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala new file mode 100644 index 00000000..5ca78f46 --- /dev/null +++ b/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala @@ -0,0 +1,104 @@ +package com.zto.fire.flink.lineage + +import com.zto.fire.common.bean.lineage.LineageResult +import com.zto.fire.common.util.Constant +import org.apache.calcite.rel.RelNode +import org.apache.calcite.rel.metadata.{JaninoRelMetadataProvider, RelColumnOrigin, RelMetadataQueryBase} +import org.apache.commons.lang3.StringUtils +import org.apache.flink.table.api.{TableException, ValidationException} +import org.apache.flink.table.api.internal.TableEnvironmentImpl +import org.apache.flink.table.operations.{Operation, SinkModifyOperation} +import org.apache.flink.table.planner.operations.PlannerQueryOperation +import org.apache.flink.table.planner.plan.metadata.FlinkDefaultRelMetadataProvider +import org.apache.flink.table.planner.plan.schema.TableSourceTable + +import java.util +import scala.collection.mutable.ListBuffer +import scala.language.postfixOps + + +class LineageContext(tableEnv: TableEnvironmentImpl) { + + private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { + val queryFieldList = relNode.getRowType.getFieldNames + if (queryFieldList.size() != sinkFieldList.size()) { + throw new ValidationException( + String.format( + "Column types of query result and sink for %s do not match.\n" + + "Query schema: %s\n" + + "Sink schema: %s", + sinkTable, queryFieldList, sinkFieldList)) + } + } + + def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode):ListBuffer[LineageResult] = { + val targetColumnList = tableEnv.from(sinkTable) + .getResolvedSchema + .getColumnNames + validateSchema(sinkTable, optRelNode, targetColumnList) + val metadataQuery = optRelNode.getCluster.getMetadataQuery + val resultList = ListBuffer[LineageResult]() + + for (index <- 0 until targetColumnList.size) { + val targetColumn = targetColumnList.get(index) + + val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index) + import scala.collection.JavaConversions._ + + if (!relColumnOriginSet.isEmpty) { + for (rco: RelColumnOrigin <- relColumnOriginSet) { + // table + val table = rco.getOriginTable + val sourceTable = String.join(Constant.DELIMITER, table.getQualifiedName) + // field + val ordinal = rco.getOriginColumnOrdinal + val fieldNames = table.asInstanceOf[TableSourceTable].contextResolvedTable.getResolvedSchema.getColumnNames + val sourceColumn = fieldNames.get(ordinal) + println("----------------------------------------------------------") + println("Source table: {}", sourceTable) + println("Source column: {}", sourceColumn) + if (StringUtils.isNotEmpty(rco.getTransform)) { + println("transform: {}", rco.getTransform) + } + // add record + resultList += new LineageResult(sourceTable, sourceColumn, sinkTable, targetColumn, rco.getTransform) + } + } + } + resultList + + + } + + def analyzeLineage(sql: String) = { + + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val parsed = parseStatement(sql) + val sinkTable = parsed._1 + val oriRelNode = parsed._2 + buildFiledLineageResult(sinkTable, oriRelNode) + } + + private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { + val operation = parseValidateConvert(singleSql) + operation match { + case sinkOperation: SinkModifyOperation => + val queryOperation = sinkOperation.getChild.asInstanceOf[PlannerQueryOperation] + val relNode = queryOperation.getCalciteTree + Tuple2(sinkOperation.getContextResolvedTable.getIdentifier.asSummaryString(), relNode) + case _ => + throw new TableException("Only insert is supported now.") + } + + + } + + private def parseValidateConvert(singleSql: String) = { + RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) + val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) + if (operations.size() != 1) { + throw new TableException("Unsupported SQL query! only accepts a single SQL statement.") + } + operations.get(0) + } +} diff --git a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/ext/stream/StreamExecutionEnvExt.scala b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/ext/stream/StreamExecutionEnvExt.scala index fb402937..10f07563 100644 --- a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/ext/stream/StreamExecutionEnvExt.scala +++ b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/ext/stream/StreamExecutionEnvExt.scala @@ -627,5 +627,7 @@ private[fire] object StreamExecutionEnvExt { /** * 创建并返回StatementSet对象实例 */ - def createStatementSet: StatementSet = FlinkSingletonFactory.getTableEnv.createStatementSet() + def createStatementSet: StatementSet = + + FlinkSingletonFactory.getTableEnv.createStatementSet() } \ No newline at end of file diff --git a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala index 56dd2eeb..3296f722 100644 --- a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala +++ b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala @@ -27,14 +27,19 @@ import com.zto.fire.common.lineage.parser.ConnectorParserManager import com.zto.fire.common.util.{ReflectionUtils, RegularUtils} import com.zto.fire.core.sql.SqlParser import com.zto.fire.flink.conf.FireFlinkConf +import com.zto.fire.flink.lineage.LineageContext import com.zto.fire.flink.util.{FlinkSingletonFactory, FlinkUtils} import com.zto.fire.jdbc.conf.FireJdbcConf import org.apache.calcite.sql._ +import org.apache.flink.configuration.Configuration import org.apache.flink.sql.parser.SqlProperty import org.apache.flink.sql.parser.ddl._ import org.apache.flink.sql.parser.dml._ import org.apache.flink.sql.parser.hive.dml.RichSqlHiveInsert +import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment +import org.apache.flink.table.api.EnvironmentSettings import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment +import org.apache.flink.table.api.internal.TableEnvironmentImpl import org.apache.flink.table.catalog.ObjectPath import org.apache.flink.table.catalog.hive.HiveCatalog import org.apache.hadoop.hive.metastore.api.Table @@ -52,6 +57,13 @@ private[fire] trait FlinkSqlParserBase extends SqlParser { // calcite parser config protected lazy val tableEnv = FlinkSingletonFactory.getTableEnv.asInstanceOf[StreamTableEnvironment] protected lazy val hiveTableMetaDataMap = new JConcurrentHashMap[String, Table]() + private val env: StreamExecutionEnvironment = FlinkSingletonFactory.getStreamEnv + + private val configuration = new Configuration + configuration.setBoolean("table.dynamic-table-options.enabled", true) + private val settings = EnvironmentSettings.newInstance.inStreamingMode.build + private val stableEnv: TableEnvironmentImpl = StreamTableEnvironment.create(env, settings).asInstanceOf[TableEnvironmentImpl] + private val context = new LineageContext(stableEnv) /** * 用于解析给定的SQL语句 @@ -59,17 +71,21 @@ private[fire] trait FlinkSqlParserBase extends SqlParser { override def sqlParser(sql: String): Unit = { try { FlinkUtils.sqlNodeParser(sql) match { - case select: SqlSelect => this.parseSqlNode(select) + case select: SqlSelect => + this.parseSqlNode(select) case insert: RichSqlInsert => { - this.parseSqlNode(insert.getTargetTable, Operation.INSERT_INTO) - this.parsePartitions(insert.getTargetTable.asInstanceOf[SqlIdentifier], Seq(insert.getStaticPartitions)) - this.parseSqlNode(insert.getSource, Operation.SELECT, targetTable = Some(insert.getTargetTable)) + val results = context.analyzeLineage(sql) + for (x <- results) { + SQLLineageManager.addColRelation(x.getSourceColumn, x.getTargetColumn) + } } case createView: SqlCreateView => { this.parseSqlNode(createView.getViewName, Operation.CREATE_VIEW) this.parseSqlNode(createView.getQuery, Operation.SELECT) } - case createTable: SqlCreateTable => parseCreateTable(createTable) + case createTable: SqlCreateTable => + stableEnv.executeSql(sql) + parseCreateTable(createTable) case _ => this.hiveSqlParser(sql) } } catch { @@ -101,7 +117,7 @@ private[fire] trait FlinkSqlParserBase extends SqlParser { case sqlHiveInsert: RichSqlHiveInsert => this.parseHiveInsert(sqlHiveInsert) case _ => this.logger.info(s"可忽略异常:实时血缘解析SQL报错,SQL:\n$sql") } - } (this.logger, catchLog = s"可忽略异常:实时血缘解析SQL报错,SQL:\n$sql", isThrow = false, hook = false) + }(this.logger, catchLog = s"可忽略异常:实时血缘解析SQL报错,SQL:\n$sql", isThrow = false, hook = false) } /** @@ -231,7 +247,7 @@ private[fire] trait FlinkSqlParserBase extends SqlParser { hiveCatalog.tableExists(this.toFlinkTableIdentifier(tableIdentifier)) } } else false - } (this.logger, catchLog = s"判断${tableIdentifier}是否为hive表失败", hook = false) + }(this.logger, catchLog = s"判断${tableIdentifier}是否为hive表失败", hook = false) } } @@ -414,6 +430,7 @@ private[fire] trait FlinkSqlParserBase extends SqlParser { /** * 用于解析sql中的options + * * @param tableIdentifier * 表名 * @param options @@ -429,6 +446,7 @@ private[fire] trait FlinkSqlParserBase extends SqlParser { /** * 解析字段列表信息 + * * @param tableIdentifier * 表名 * @param columnList diff --git a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/util/FlinkSingletonFactory.scala b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/util/FlinkSingletonFactory.scala index a8f9a333..ace44706 100644 --- a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/util/FlinkSingletonFactory.scala +++ b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/util/FlinkSingletonFactory.scala @@ -72,4 +72,7 @@ object FlinkSingletonFactory extends SingletonFactory { * TableEnv实例 */ private[fire] def getTableEnv: TableEnvironment = this.tableEnv + + private [fire] def getStreamEnv: StreamExecutionEnvironment = this.streamEnv + } diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelColumnOrigin.java new file mode 100644 index 00000000..358299d5 --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -0,0 +1,125 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelColumnOrigin + *

+ * Modification point: + *

    + *
  1. add transform field and related code. + *
+ * + * @description: RelColumnOrigin is a data structure describing one of the origins of an + * output column produced by a relational expression. + * @author: HamaWhite + */ +public class RelColumnOrigin { + // ~ Instance fields -------------------------------------------------------- + + private final RelOptTable originTable; + + private final int iOriginColumn; + + private final boolean isDerived; + + /** + * Stores the expression for data conversion, + * which source table fields are transformed by which expression the target field + */ + private String transform; + + // ~ Constructors ----------------------------------------------------------- + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + } + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived, + String transform) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + this.transform = transform; + } + + // ~ Methods ---------------------------------------------------------------- + + /** + * Returns table of origin. + */ + public RelOptTable getOriginTable() { + return originTable; + } + + /** + * Returns the 0-based index of column in origin table; whether this ordinal + * is flattened or unflattened depends on whether UDT flattening has already + * been performed on the relational expression which produced this + * description. + */ + public int getOriginColumnOrdinal() { + return iOriginColumn; + } + + /** + * Consider the query select a+b as c, d as e from t. The + * output column c has two origins (a and b), both of them derived. The + * output column d as one origin (c), which is not derived. + * + * @return false if value taken directly from column in origin table; true + * otherwise + */ + public boolean isDerived() { + return isDerived; + } + + public String getTransform() { + return transform; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof RelColumnOrigin)) { + return false; + } + RelColumnOrigin other = (RelColumnOrigin) obj; + return originTable.getQualifiedName().equals( + other.originTable.getQualifiedName()) + && (iOriginColumn == other.iOriginColumn) + && (isDerived == other.isDerived); + } + + @Override + public int hashCode() { + return originTable.getQualifiedName().hashCode() + + iOriginColumn + (isDerived ? 313 : 0); + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java new file mode 100644 index 00000000..f3f0f57d --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -0,0 +1,551 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Calc; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.Exchange; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Match; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.core.Snapshot; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.TableFunctionScan; +import org.apache.calcite.rel.core.TableModify; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLocalRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexPatternFieldRef; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexVisitor; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.util.BuiltInMethod; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelMdColumnOrigins + * + *

Modification point: + *

    + *
  1. Support lookup join, add method getColumnOrigins(Snapshot rel,RelMetadataQuery mq, int iOutputColumn) + *
  2. Support watermark, add method getColumnOrigins(SingleRel rel,RelMetadataQuery mq, int iOutputColumn) + *
  3. Support table function, add method getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) + *
  4. Support CEP, add method getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) + *
  5. Support transform, add method createDerivedColumnOrigins(Set inputSet, String transform, boolean originTransform), and related code + *
  6. Support field AS LOCALTIMESTAMP, modify method getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
  7. Support PROCTIME() is the first filed, add method computeIndexWithOffset, used by getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
      + * + * @description: RelMdColumnOrigins supplies a default implementation of {@link RelMetadataQuery#getColumnOrigins} for the standard logical algebra. + * @author: HamaWhite + */ +public class RelMdColumnOrigins implements MetadataHandler { + + private static final Logger LOG = LoggerFactory.getLogger(RelMdColumnOrigins.class); + + public static final String DELIMITER = "."; + + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.COLUMN_ORIGIN.method, new RelMdColumnOrigins()); + + // ~ Constructors ----------------------------------------------------------- + + private RelMdColumnOrigins() { + } + + // ~ Methods ---------------------------------------------------------------- + + public MetadataDef getDef() { + return BuiltInMetadata.ColumnOrigin.DEF; + } + + public Set getColumnOrigins(Aggregate rel, + RelMetadataQuery mq, int iOutputColumn) { + if (iOutputColumn < rel.getGroupCount()) { + // get actual index of Group columns. + return mq.getColumnOrigins(rel.getInput(), rel.getGroupSet().asList().get(iOutputColumn)); + } + + // Aggregate columns are derived from input columns + AggregateCall call = rel.getAggCallList().get(iOutputColumn + - rel.getGroupCount()); + + final Set set = new LinkedHashSet<>(); + for (Integer iInput : call.getArgList()) { + Set inputSet = mq.getColumnOrigins(rel.getInput(), iInput); + inputSet = createDerivedColumnOrigins(inputSet, call.toString(), true); + if (inputSet != null) { + set.addAll(inputSet); + } + } + return set; + } + + public Set getColumnOrigins(Join rel, RelMetadataQuery mq, + int iOutputColumn) { + int nLeftColumns = rel.getLeft().getRowType().getFieldList().size(); + Set set; + boolean derived = false; + if (iOutputColumn < nLeftColumns) { + set = mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + if (rel.getJoinType().generatesNullsOnLeft()) { + derived = true; + } + } else { + set = mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + if (rel.getJoinType().generatesNullsOnRight()) { + derived = true; + } + } + if (derived) { + // nulls are generated due to outer join; that counts + // as derivation + set = createDerivedColumnOrigins(set); + } + return set; + } + + /** + * Support the field blood relationship of table function + */ + public Set getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) { + + List leftFieldList = rel.getLeft().getRowType().getFieldList(); + + int nLeftColumns = leftFieldList.size(); + Set set; + if (iOutputColumn < nLeftColumns) { + set = mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + } else { + if (rel.getRight() instanceof TableFunctionScan) { + // get the field name of the left table configured in the Table Function on the right + TableFunctionScan tableFunctionScan = (TableFunctionScan) rel.getRight(); + RexCall rexCall = (RexCall) tableFunctionScan.getCall(); + // support only one field in table function + RexFieldAccess rexFieldAccess = (RexFieldAccess) rexCall.getOperands().get(0); + String fieldName = rexFieldAccess.getField().getName(); + + int leftFieldIndex = 0; + for (int i = 0; i < nLeftColumns; i++) { + if (leftFieldList.get(i).getName().equalsIgnoreCase(fieldName)) { + leftFieldIndex = i; + break; + } + } + /** + * Get the fields from the left table, don't go to + * getColumnOrigins(TableFunctionScan rel,RelMetadataQuery mq, int iOutputColumn), + * otherwise the return is null, and the UDTF field origin cannot be parsed + */ + set = mq.getColumnOrigins(rel.getLeft(), leftFieldIndex); + + // process transform for udtf + String transform = rexCall.toString().replace(rexFieldAccess.toString(), fieldName) + + DELIMITER + + tableFunctionScan.getRowType().getFieldNames().get(iOutputColumn - nLeftColumns); + set = createDerivedColumnOrigins(set, transform, false); + } else { + set = mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + } + } + return set; + } + + public Set getColumnOrigins(SetOp rel, + RelMetadataQuery mq, int iOutputColumn) { + final Set set = new LinkedHashSet<>(); + for (RelNode input : rel.getInputs()) { + Set inputSet = mq.getColumnOrigins(input, iOutputColumn); + if (inputSet == null) { + return Collections.emptySet(); + } + set.addAll(inputSet); + } + return set; + } + + /** + * Support the field blood relationship of lookup join + */ + public Set getColumnOrigins(Snapshot rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support the field blood relationship of watermark + */ + public Set getColumnOrigins(SingleRel rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support for new fields in the source table similar to those created with the LOCALTIMESTAMP function + */ + public Set getColumnOrigins(Project rel, + final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + RexNode rexNode = rel.getProjects().get(iOutputColumn); + + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + int index = inputRef.getIndex(); + if (input instanceof TableScan) { + index = computeIndexWithOffset(rel.getProjects(), inputRef.getIndex(), iOutputColumn); + } + return mq.getColumnOrigins(input, index); + } else if (input instanceof TableScan && rexNode.getClass().equals(RexCall.class) + && ((RexCall) rexNode).getOperands().isEmpty()) { + return mq.getColumnOrigins(input, iOutputColumn); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set, rexNode.toString(), true); + } + + private int computeIndexWithOffset(List projects, int baseIndex, int iOutputColumn) { + int offset = 0; + for (int index = 0; index < iOutputColumn; index++) { + RexNode rexNode = projects.get(index); + if ((rexNode.getClass().equals(RexCall.class) && ((RexCall) rexNode).getOperands().isEmpty())) { + offset += 1; + } + } + return baseIndex + offset; + } + + /** + * Support field blood relationship of CEP. + * The first column is the field after PARTITION BY, and the other columns come from the measures in Match + */ + public Set getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) { + int orderCount = rel.getOrderKeys().getKeys().size(); + + if (iOutputColumn < orderCount) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + final RelNode input = rel.getInput(); + RexNode rexNode = rel.getMeasures().values().asList().get(iOutputColumn - orderCount); + + RexPatternFieldRef rexPatternFieldRef = searchRexPatternFieldRef(rexNode); + if (rexPatternFieldRef != null) { + final Set set = mq.getColumnOrigins(input, rexPatternFieldRef.getIndex()); + String originTransform = rexNode instanceof RexCall + ? ((RexCall) rexNode).getOperands().get(0).toString() + : null; + return createDerivedColumnOrigins(set, originTransform, true); + } + return Collections.emptySet(); + } + + private RexPatternFieldRef searchRexPatternFieldRef(RexNode rexNode) { + if (rexNode instanceof RexCall) { + RexNode operand = ((RexCall) rexNode).getOperands().get(0); + if (operand instanceof RexPatternFieldRef) { + return (RexPatternFieldRef) operand; + } else { + // recursive search + return searchRexPatternFieldRef(operand); + } + } + return null; + } + + public Set getColumnOrigins(Calc rel, + final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + final RexShuttle rexShuttle = new RexShuttle() { + + @Override + public RexNode visitLocalRef(RexLocalRef localRef) { + return rel.getProgram().expandLocalRef(localRef); + } + }; + final List projects = new ArrayList<>(); + for (RexNode rex : rexShuttle.apply(rel.getProgram().getProjectList())) { + projects.add(rex); + } + final RexNode rexNode = projects.get(iOutputColumn); + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + return mq.getColumnOrigins(input, inputRef.getIndex()); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set); + } + + public Set getColumnOrigins(Filter rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Sort rel, RelMetadataQuery mq, + int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableModify rel, RelMetadataQuery mq, + int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Exchange rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableFunctionScan rel, + RelMetadataQuery mq, int iOutputColumn) { + final Set set = new LinkedHashSet<>(); + Set mappings = rel.getColumnMappings(); + if (mappings == null) { + if (!rel.getInputs().isEmpty()) { + // This is a non-leaf transformation: say we don't + // know about origins, because there are probably + // columns below. + return Collections.emptySet(); + } else { + // This is a leaf transformation: say there are fer sure no + // column origins. + return set; + } + } + for (RelColumnMapping mapping : mappings) { + if (mapping.iOutputColumn != iOutputColumn) { + continue; + } + final RelNode input = rel.getInputs().get(mapping.iInputRel); + final int column = mapping.iInputColumn; + Set origins = mq.getColumnOrigins(input, column); + if (origins == null) { + return Collections.emptySet(); + } + if (mapping.derived) { + origins = createDerivedColumnOrigins(origins); + } + set.addAll(origins); + } + return set; + } + + // Catch-all rule when none of the others apply. + @SuppressWarnings("squid:S1172") + public Set getColumnOrigins(RelNode rel, + RelMetadataQuery mq, int iOutputColumn) { + // NOTE jvs 28-Mar-2006: We may get this wrong for a physical table + // expression which supports projections. In that case, + // it's up to the plugin writer to override with the + // correct information. + + if (!rel.getInputs().isEmpty()) { + // No generic logic available for non-leaf rels. + return Collections.emptySet(); + } + + final Set set = new LinkedHashSet<>(); + + RelOptTable table = rel.getTable(); + if (table == null) { + // Somebody is making column values up out of thin air, like a + // VALUES clause, so we return an empty set. + return set; + } + + // Detect the case where a physical table expression is performing + // projection, and say we don't know instead of making any assumptions. + // (Theoretically we could try to map the projection using column + // names.) This detection assumes the table expression doesn't handle + // rename as well. + if (table.getRowType() != rel.getRowType()) { + return Collections.emptySet(); + } + + set.add(new RelColumnOrigin(table, iOutputColumn, false)); + return set; + } + + private Set createDerivedColumnOrigins( + Set inputSet) { + if (inputSet == null) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true); + set.add(derived); + } + return set; + } + + private Set createDerivedColumnOrigins( + Set inputSet, String transform, boolean originTransform) { + if (inputSet == null || inputSet.isEmpty()) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + + String finalTransform = originTransform ? computeTransform(inputSet, transform) : transform; + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true, + finalTransform); + set.add(derived); + } + return set; + } + + /** + * Replace the variable at the beginning of $ in input with the real field information + */ + private String computeTransform(Set inputSet, String transform) { + LOG.debug("origin transform: {}", transform); + Pattern pattern = Pattern.compile("\\$\\d+"); + Matcher matcher = pattern.matcher(transform); + + Set operandSet = new LinkedHashSet<>(); + while (matcher.find()) { + operandSet.add(matcher.group()); + } + + if (operandSet.isEmpty()) { + LOG.info("operandSet is empty"); + return null; + } + if (inputSet.size() != operandSet.size()) { + LOG.warn("The number [{}] of fields in the source tables are not equal to operands [{}]", inputSet.size(), + operandSet.size()); + return null; + } + + Map sourceColumnMap = new HashMap<>(); + Iterator iterator = optimizeSourceColumnSet(inputSet).iterator(); + operandSet.forEach(e -> sourceColumnMap.put(e, iterator.next())); + LOG.debug("sourceColumnMap: {}", sourceColumnMap); + + matcher = pattern.matcher(transform); + String temp; + while (matcher.find()) { + temp = matcher.group(); + transform = transform.replace(temp, sourceColumnMap.get(temp)); + } + + // temporary special treatment + transform = transform.replace("_UTF-16LE", ""); + LOG.debug("transform: {}", transform); + return transform; + } + + /** + * Increase the readability of transform. + * if catalog, database and table are the same, return field. + * If the catalog and database are the same, return the table and field. + * If the catalog is the same, return the database, table, field. + * Otherwise, return all + */ + private Set optimizeSourceColumnSet(Set inputSet) { + Set catalogSet = new HashSet<>(); + Set databaseSet = new HashSet<>(); + Set tableSet = new HashSet<>(); + Set> qualifiedSet = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelOptTable originTable = rco.getOriginTable(); + List qualifiedName = originTable.getQualifiedName(); + + // catalog,database,table,field + List qualifiedList = new ArrayList<>(qualifiedName); + catalogSet.add(qualifiedName.get(0)); + databaseSet.add(qualifiedName.get(1)); + tableSet.add(qualifiedName.get(2)); + + String field = rco.getTransform() != null + ? rco.getTransform() + : originTable.getRowType().getFieldNames().get(rco.getOriginColumnOrdinal()); + qualifiedList.add(field); + qualifiedSet.add(qualifiedList); + } + if (catalogSet.size() == 1 && databaseSet.size() == 1 && tableSet.size() == 1) { + return optimizeName(qualifiedSet, e -> e.get(3)); + } else if (catalogSet.size() == 1 && databaseSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(2, 4))); + } else if (catalogSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(1, 4))); + } else { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e)); + } + } + + private Set optimizeName(Set> qualifiedSet, Function, String> mapper) { + return qualifiedSet.stream().map(mapper).collect(Collectors.toSet()); + } + + private Set getMultipleColumns(RexNode rexNode, RelNode input, + final RelMetadataQuery mq) { + final Set set = new LinkedHashSet<>(); + final RexVisitor visitor = new RexVisitorImpl(true) { + + @Override + public Void visitInputRef(RexInputRef inputRef) { + Set inputSet = mq.getColumnOrigins(input, inputRef.getIndex()); + if (inputSet != null) { + set.addAll(inputSet); + } + return null; + } + }; + rexNode.accept(visitor); + return set; + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelColumnOrigin.java new file mode 100644 index 00000000..358299d5 --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -0,0 +1,125 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelColumnOrigin + *

      + * Modification point: + *

        + *
      1. add transform field and related code. + *
      + * + * @description: RelColumnOrigin is a data structure describing one of the origins of an + * output column produced by a relational expression. + * @author: HamaWhite + */ +public class RelColumnOrigin { + // ~ Instance fields -------------------------------------------------------- + + private final RelOptTable originTable; + + private final int iOriginColumn; + + private final boolean isDerived; + + /** + * Stores the expression for data conversion, + * which source table fields are transformed by which expression the target field + */ + private String transform; + + // ~ Constructors ----------------------------------------------------------- + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + } + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived, + String transform) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + this.transform = transform; + } + + // ~ Methods ---------------------------------------------------------------- + + /** + * Returns table of origin. + */ + public RelOptTable getOriginTable() { + return originTable; + } + + /** + * Returns the 0-based index of column in origin table; whether this ordinal + * is flattened or unflattened depends on whether UDT flattening has already + * been performed on the relational expression which produced this + * description. + */ + public int getOriginColumnOrdinal() { + return iOriginColumn; + } + + /** + * Consider the query select a+b as c, d as e from t. The + * output column c has two origins (a and b), both of them derived. The + * output column d as one origin (c), which is not derived. + * + * @return false if value taken directly from column in origin table; true + * otherwise + */ + public boolean isDerived() { + return isDerived; + } + + public String getTransform() { + return transform; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof RelColumnOrigin)) { + return false; + } + RelColumnOrigin other = (RelColumnOrigin) obj; + return originTable.getQualifiedName().equals( + other.originTable.getQualifiedName()) + && (iOriginColumn == other.iOriginColumn) + && (isDerived == other.isDerived); + } + + @Override + public int hashCode() { + return originTable.getQualifiedName().hashCode() + + iOriginColumn + (isDerived ? 313 : 0); + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java new file mode 100644 index 00000000..3b50226b --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -0,0 +1,520 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.core.*; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.*; +import org.apache.calcite.util.BuiltInMethod; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelMdColumnOrigins + * + *

      Modification point: + *

        + *
      1. Support lookup join, add method getColumnOrigins(Snapshot rel,RelMetadataQuery mq, int iOutputColumn) + *
      2. Support watermark, add method getColumnOrigins(SingleRel rel,RelMetadataQuery mq, int iOutputColumn) + *
      3. Support table function, add method getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) + *
      4. Support CEP, add method getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) + *
      5. Support transform, add method createDerivedColumnOrigins(Set inputSet, String transform, boolean originTransform), and related code + *
      6. Support field AS LOCALTIMESTAMP, modify method getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
      7. Support PROCTIME() is the first filed, add method computeIndexWithOffset, used by getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
          + * + * @description: RelMdColumnOrigins supplies a default implementation of {@link RelMetadataQuery#getColumnOrigins} for the standard logical algebra. + * @author: HamaWhite + */ +public class RelMdColumnOrigins implements MetadataHandler { + + private static final Logger LOG = LoggerFactory.getLogger(RelMdColumnOrigins.class); + + public static final String DELIMITER = "."; + + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.COLUMN_ORIGIN.method, new RelMdColumnOrigins()); + + // ~ Constructors ----------------------------------------------------------- + + private RelMdColumnOrigins() { + } + + // ~ Methods ---------------------------------------------------------------- + + public MetadataDef getDef() { + return BuiltInMetadata.ColumnOrigin.DEF; + } + + public Set getColumnOrigins(Aggregate rel, + RelMetadataQuery mq, int iOutputColumn) { + if (iOutputColumn < rel.getGroupCount()) { + // get actual index of Group columns. + return mq.getColumnOrigins(rel.getInput(), rel.getGroupSet().asList().get(iOutputColumn)); + } + + // Aggregate columns are derived from input columns + AggregateCall call = rel.getAggCallList().get(iOutputColumn + - rel.getGroupCount()); + + final Set set = new LinkedHashSet<>(); + for (Integer iInput : call.getArgList()) { + Set inputSet = mq.getColumnOrigins(rel.getInput(), iInput); + inputSet = createDerivedColumnOrigins(inputSet, call.toString(), true); + if (inputSet != null) { + set.addAll(inputSet); + } + } + return set; + } + + public Set getColumnOrigins(Join rel, RelMetadataQuery mq, + int iOutputColumn) { + int nLeftColumns = rel.getLeft().getRowType().getFieldList().size(); + Set set; + boolean derived = false; + if (iOutputColumn < nLeftColumns) { + set = mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + if (rel.getJoinType().generatesNullsOnLeft()) { + derived = true; + } + } else { + set = mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + if (rel.getJoinType().generatesNullsOnRight()) { + derived = true; + } + } + if (derived) { + // nulls are generated due to outer join; that counts + // as derivation + set = createDerivedColumnOrigins(set); + } + return set; + } + + /** + * Support the field blood relationship of table function + */ + public Set getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) { + + List leftFieldList = rel.getLeft().getRowType().getFieldList(); + + int nLeftColumns = leftFieldList.size(); + Set set; + if (iOutputColumn < nLeftColumns) { + set = mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + } else { + if (rel.getRight() instanceof TableFunctionScan) { + // get the field name of the left table configured in the Table Function on the right + TableFunctionScan tableFunctionScan = (TableFunctionScan) rel.getRight(); + RexCall rexCall = (RexCall) tableFunctionScan.getCall(); + // support only one field in table function + RexFieldAccess rexFieldAccess = (RexFieldAccess) rexCall.getOperands().get(0); + String fieldName = rexFieldAccess.getField().getName(); + + int leftFieldIndex = 0; + for (int i = 0; i < nLeftColumns; i++) { + if (leftFieldList.get(i).getName().equalsIgnoreCase(fieldName)) { + leftFieldIndex = i; + break; + } + } + /** + * Get the fields from the left table, don't go to + * getColumnOrigins(TableFunctionScan rel,RelMetadataQuery mq, int iOutputColumn), + * otherwise the return is null, and the UDTF field origin cannot be parsed + */ + set = mq.getColumnOrigins(rel.getLeft(), leftFieldIndex); + + // process transform for udtf + String transform = rexCall.toString().replace(rexFieldAccess.toString(), fieldName) + + DELIMITER + + tableFunctionScan.getRowType().getFieldNames().get(iOutputColumn - nLeftColumns); + set = createDerivedColumnOrigins(set, transform, false); + } else { + set = mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + } + } + return set; + } + + public Set getColumnOrigins(SetOp rel, + RelMetadataQuery mq, int iOutputColumn) { + final Set set = new LinkedHashSet<>(); + for (RelNode input : rel.getInputs()) { + Set inputSet = mq.getColumnOrigins(input, iOutputColumn); + if (inputSet == null) { + return Collections.emptySet(); + } + set.addAll(inputSet); + } + return set; + } + + /** + * Support the field blood relationship of lookup join + */ + public Set getColumnOrigins(Snapshot rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support the field blood relationship of watermark + */ + public Set getColumnOrigins(SingleRel rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support for new fields in the source table similar to those created with the LOCALTIMESTAMP function + */ + public Set getColumnOrigins(Project rel, + final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + RexNode rexNode = rel.getProjects().get(iOutputColumn); + + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + int index = inputRef.getIndex(); + if (input instanceof TableScan) { + index = computeIndexWithOffset(rel.getProjects(), inputRef.getIndex(), iOutputColumn); + } + return mq.getColumnOrigins(input, index); + } else if (input instanceof TableScan && rexNode.getClass().equals(RexCall.class) + && ((RexCall) rexNode).getOperands().isEmpty()) { + return mq.getColumnOrigins(input, iOutputColumn); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set, rexNode.toString(), true); + } + + private int computeIndexWithOffset(List projects, int baseIndex, int iOutputColumn) { + int offset = 0; + for (int index = 0; index < iOutputColumn; index++) { + RexNode rexNode = projects.get(index); + if ((rexNode.getClass().equals(RexCall.class) && ((RexCall) rexNode).getOperands().isEmpty())) { + offset += 1; + } + } + return baseIndex + offset; + } + + /** + * Support field blood relationship of CEP. + * The first column is the field after PARTITION BY, and the other columns come from the measures in Match + */ + public Set getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) { + int orderCount = rel.getOrderKeys().getKeys().size(); + + if (iOutputColumn < orderCount) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + final RelNode input = rel.getInput(); + RexNode rexNode = rel.getMeasures().values().asList().get(iOutputColumn - orderCount); + + RexPatternFieldRef rexPatternFieldRef = searchRexPatternFieldRef(rexNode); + if (rexPatternFieldRef != null) { + final Set set = mq.getColumnOrigins(input, rexPatternFieldRef.getIndex()); + String originTransform = rexNode instanceof RexCall + ? ((RexCall) rexNode).getOperands().get(0).toString() + : null; + return createDerivedColumnOrigins(set, originTransform, true); + } + return Collections.emptySet(); + } + + private RexPatternFieldRef searchRexPatternFieldRef(RexNode rexNode) { + if (rexNode instanceof RexCall) { + RexNode operand = ((RexCall) rexNode).getOperands().get(0); + if (operand instanceof RexPatternFieldRef) { + return (RexPatternFieldRef) operand; + } else { + // recursive search + return searchRexPatternFieldRef(operand); + } + } + return null; + } + + public Set getColumnOrigins(Calc rel, + final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + final RexShuttle rexShuttle = new RexShuttle() { + + @Override + public RexNode visitLocalRef(RexLocalRef localRef) { + return rel.getProgram().expandLocalRef(localRef); + } + }; + final List projects = new ArrayList<>(); + for (RexNode rex : rexShuttle.apply(rel.getProgram().getProjectList())) { + projects.add(rex); + } + final RexNode rexNode = projects.get(iOutputColumn); + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + return mq.getColumnOrigins(input, inputRef.getIndex()); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set); + } + + public Set getColumnOrigins(Filter rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Sort rel, RelMetadataQuery mq, + int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableModify rel, RelMetadataQuery mq, + int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Exchange rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableFunctionScan rel, + RelMetadataQuery mq, int iOutputColumn) { + final Set set = new LinkedHashSet<>(); + Set mappings = rel.getColumnMappings(); + if (mappings == null) { + if (!rel.getInputs().isEmpty()) { + // This is a non-leaf transformation: say we don't + // know about origins, because there are probably + // columns below. + return Collections.emptySet(); + } else { + // This is a leaf transformation: say there are fer sure no + // column origins. + return set; + } + } + for (RelColumnMapping mapping : mappings) { + if (mapping.iOutputColumn != iOutputColumn) { + continue; + } + final RelNode input = rel.getInputs().get(mapping.iInputRel); + final int column = mapping.iInputColumn; + Set origins = mq.getColumnOrigins(input, column); + if (origins == null) { + return Collections.emptySet(); + } + if (mapping.derived) { + origins = createDerivedColumnOrigins(origins); + } + set.addAll(origins); + } + return set; + } + + // Catch-all rule when none of the others apply. + @SuppressWarnings("squid:S1172") + public Set getColumnOrigins(RelNode rel, + RelMetadataQuery mq, int iOutputColumn) { + // NOTE jvs 28-Mar-2006: We may get this wrong for a physical table + // expression which supports projections. In that case, + // it's up to the plugin writer to override with the + // correct information. + + if (!rel.getInputs().isEmpty()) { + // No generic logic available for non-leaf rels. + return Collections.emptySet(); + } + + final Set set = new LinkedHashSet<>(); + + RelOptTable table = rel.getTable(); + if (table == null) { + // Somebody is making column values up out of thin air, like a + // VALUES clause, so we return an empty set. + return set; + } + + // Detect the case where a physical table expression is performing + // projection, and say we don't know instead of making any assumptions. + // (Theoretically we could try to map the projection using column + // names.) This detection assumes the table expression doesn't handle + // rename as well. + if (table.getRowType() != rel.getRowType()) { + return Collections.emptySet(); + } + + set.add(new RelColumnOrigin(table, iOutputColumn, false)); + return set; + } + + private Set createDerivedColumnOrigins( + Set inputSet) { + if (inputSet == null) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true); + set.add(derived); + } + return set; + } + + private Set createDerivedColumnOrigins( + Set inputSet, String transform, boolean originTransform) { + if (inputSet == null || inputSet.isEmpty()) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + + String finalTransform = originTransform ? computeTransform(inputSet, transform) : transform; + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true, + finalTransform); + set.add(derived); + } + return set; + } + + /** + * Replace the variable at the beginning of $ in input with the real field information + */ + private String computeTransform(Set inputSet, String transform) { + LOG.debug("origin transform: {}", transform); + Pattern pattern = Pattern.compile("\\$\\d+"); + Matcher matcher = pattern.matcher(transform); + + Set operandSet = new LinkedHashSet<>(); + while (matcher.find()) { + operandSet.add(matcher.group()); + } + + if (operandSet.isEmpty()) { + LOG.info("operandSet is empty"); + return null; + } + if (inputSet.size() != operandSet.size()) { + LOG.warn("The number [{}] of fields in the source tables are not equal to operands [{}]", inputSet.size(), + operandSet.size()); + return null; + } + + Map sourceColumnMap = new HashMap<>(); + Iterator iterator = optimizeSourceColumnSet(inputSet).iterator(); + operandSet.forEach(e -> sourceColumnMap.put(e, iterator.next())); + LOG.debug("sourceColumnMap: {}", sourceColumnMap); + + matcher = pattern.matcher(transform); + String temp; + while (matcher.find()) { + temp = matcher.group(); + transform = transform.replace(temp, sourceColumnMap.get(temp)); + } + + // temporary special treatment + transform = transform.replace("_UTF-16LE", ""); + LOG.debug("transform: {}", transform); + return transform; + } + + /** + * Increase the readability of transform. + * if catalog, database and table are the same, return field. + * If the catalog and database are the same, return the table and field. + * If the catalog is the same, return the database, table, field. + * Otherwise, return all + */ + private Set optimizeSourceColumnSet(Set inputSet) { + Set catalogSet = new HashSet<>(); + Set databaseSet = new HashSet<>(); + Set tableSet = new HashSet<>(); + Set> qualifiedSet = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelOptTable originTable = rco.getOriginTable(); + List qualifiedName = originTable.getQualifiedName(); + + // catalog,database,table,field + List qualifiedList = new ArrayList<>(qualifiedName); + catalogSet.add(qualifiedName.get(0)); + databaseSet.add(qualifiedName.get(1)); + tableSet.add(qualifiedName.get(2)); + + String field = rco.getTransform() != null + ? rco.getTransform() + : originTable.getRowType().getFieldNames().get(rco.getOriginColumnOrdinal()); + qualifiedList.add(field); + qualifiedSet.add(qualifiedList); + } + if (catalogSet.size() == 1 && databaseSet.size() == 1 && tableSet.size() == 1) { + return optimizeName(qualifiedSet, e -> e.get(3)); + } else if (catalogSet.size() == 1 && databaseSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(2, 4))); + } else if (catalogSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(1, 4))); + } else { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e)); + } + } + + private Set optimizeName(Set> qualifiedSet, Function, String> mapper) { + return qualifiedSet.stream().map(mapper).collect(Collectors.toSet()); + } + + private Set getMultipleColumns(RexNode rexNode, RelNode input, + final RelMetadataQuery mq) { + final Set set = new LinkedHashSet<>(); + final RexVisitor visitor = new RexVisitorImpl(true) { + + @Override + public Void visitInputRef(RexInputRef inputRef) { + Set inputSet = mq.getColumnOrigins(input, inputRef.getIndex()); + if (inputSet != null) { + set.addAll(inputSet); + } + return null; + } + }; + rexNode.accept(visitor); + return set; + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/sql/SqlSelect.java b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/sql/SqlSelect.java new file mode 100644 index 00000000..0cfaa758 --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/sql/SqlSelect.java @@ -0,0 +1,355 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.calcite.sql; + + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql.validate.SqlValidatorScope; +import org.apache.calcite.util.ImmutableNullableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nonnull; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; + +/** + * A SqlSelect is a node of a parse tree which represents a select statement. It + * warrants its own node type just because we have a lot of methods to put somewhere. + * + * @description: Modify the value method of where to addCondition() to support row-level permission + * filtering + * @author: baisong + * @version: 1.0.0 + * @date: 2022/12/14 12:24 PM + */ +public class SqlSelect extends SqlCall { + + private static final Logger LOG = LoggerFactory.getLogger(SqlSelect.class); + + public static final int FROM_OPERAND = 2; + public static final int WHERE_OPERAND = 3; + public static final int HAVING_OPERAND = 5; + + SqlNodeList keywordList; + SqlNodeList selectList; + SqlNode from; + SqlNode where; + SqlNodeList groupBy; + SqlNode having; + SqlNodeList windowDecls; + SqlNodeList orderBy; + SqlNode offset; + SqlNode fetch; + SqlNodeList hints; + + public SqlSelect(SqlParserPos pos, + SqlNodeList keywordList, + SqlNodeList selectList, + SqlNode from, + SqlNode where, + SqlNodeList groupBy, + SqlNode having, + SqlNodeList windowDecls, + SqlNodeList orderBy, + SqlNode offset, + SqlNode fetch, + SqlNodeList hints) { + super(pos); + this.keywordList = Objects.requireNonNull(keywordList != null + ? keywordList + : new SqlNodeList(pos)); + this.selectList = selectList; + this.from = from; + this.groupBy = groupBy; + this.having = having; + this.windowDecls = Objects.requireNonNull(windowDecls != null + ? windowDecls + : new SqlNodeList(pos)); + this.orderBy = orderBy; + this.offset = offset; + this.fetch = fetch; + this.hints = hints; + + // add row level filter condition for where clause +// this.where = addCondition(from, where, false); + } + + /** + * The main process of controlling row-level permissions + */ +/* private SqlNode addCondition(SqlNode from, SqlNode where, boolean fromJoin) { + if (from instanceof SqlIdentifier) { + String tableName = from.toString(); + // the table name is used as an alias for join + String tableAlias = fromJoin ? tableName : null; + return addPermission(where, tableName, tableAlias); + } else if (from instanceof SqlJoin) { + SqlJoin sqlJoin = (SqlJoin) from; + // support recursive processing, such as join for three tables, process left sqlNode + where = addCondition(sqlJoin.getLeft(), where, true); + // process right sqlNode + return addCondition(sqlJoin.getRight(), where, true); + } else if (from instanceof SqlBasicCall) { + // Table has an alias or comes from a subquery + SqlNode[] tableNodes = ((SqlBasicCall) from).getOperands(); + *//** + * If there is a subquery in the Join, row-level filtering has been appended to the subquery. + * What is returned here is the SqlSelect type, just return the original where directly + *//* + if (!(tableNodes[0] instanceof SqlIdentifier)) { + return where; + } + String tableName = tableNodes[0].toString(); + String tableAlias = tableNodes[1].toString(); + return addPermission(where, tableName, tableAlias); + } + return where; + }*/ + + /** + * Add row-level filtering based on user-configured permission points + */ +/* private SqlNode addPermission(SqlNode where, String tableName, String tableAlias) { + SqlBasicCall permissions = null; + ConcurrentHashMap permissionsMap = RowLevelPermissionsContext.get(); + if (permissionsMap != null) { + String permissionsStatement = permissionsMap.get(tableName); + if (permissionsStatement != null && !"".equals(permissionsStatement)) { + permissions = (SqlBasicCall) CustomTableEnvironmentContext.get().getParser() + .parseExpression(permissionsStatement); + } + } + + // add an alias + if (permissions != null && tableAlias != null) { + ImmutableList namesList = ImmutableList.of(tableAlias, permissions.getOperands()[0].toString()); + permissions.getOperands()[0] = new SqlIdentifier(namesList, null, new SqlParserPos(0, 0), null); + } + + return buildWhereClause(where, permissions); + }*/ + + /** + * Rebuild the where clause + */ + private SqlNode buildWhereClause(SqlNode where, SqlBasicCall permissions) { + if (permissions != null) { + if (where == null) { + return permissions; + } + SqlBinaryOperator sqlBinaryOperator = new SqlBinaryOperator(SqlKind.AND.name(), SqlKind.AND, 0, true, null, + null, null); + SqlNode[] operands = new SqlNode[2]; + operands[0] = where; + operands[1] = permissions; + SqlParserPos sqlParserPos = new SqlParserPos(0, 0); + return new SqlBasicCall(sqlBinaryOperator, operands, sqlParserPos); + } + return where; + } + + @Override + public SqlOperator getOperator() { + return SqlSelectOperator.INSTANCE; + } + + @Override + public SqlKind getKind() { + return SqlKind.SELECT; + } + + @Override + public List getOperandList() { + return ImmutableNullableList.of(keywordList, selectList, from, where, + groupBy, having, windowDecls, orderBy, offset, fetch, hints); + } + + @Override + public void setOperand(int i, SqlNode operand) { + switch (i) { + case 0: + keywordList = Objects.requireNonNull((SqlNodeList) operand); + break; + case 1: + selectList = (SqlNodeList) operand; + break; + case 2: + from = operand; + break; + case 3: + where = operand; + break; + case 4: + groupBy = (SqlNodeList) operand; + break; + case 5: + having = operand; + break; + case 6: + windowDecls = Objects.requireNonNull((SqlNodeList) operand); + break; + case 7: + orderBy = (SqlNodeList) operand; + break; + case 8: + offset = operand; + break; + case 9: + fetch = operand; + break; + default: + throw new AssertionError(i); + } + } + + public final boolean isDistinct() { + return getModifierNode(SqlSelectKeyword.DISTINCT) != null; + } + + public final SqlNode getModifierNode(SqlSelectKeyword modifier) { + for (SqlNode keyword : keywordList) { + SqlSelectKeyword keyword2 = ((SqlLiteral) keyword).symbolValue(SqlSelectKeyword.class); + if (keyword2 == modifier) { + return keyword; + } + } + return null; + } + + public final SqlNode getFrom() { + return from; + } + + public void setFrom(SqlNode from) { + this.from = from; + } + + public final SqlNodeList getGroup() { + return groupBy; + } + + public void setGroupBy(SqlNodeList groupBy) { + this.groupBy = groupBy; + } + + public final SqlNode getHaving() { + return having; + } + + public void setHaving(SqlNode having) { + this.having = having; + } + + public final SqlNodeList getSelectList() { + return selectList; + } + + public void setSelectList(SqlNodeList selectList) { + this.selectList = selectList; + } + + public final SqlNode getWhere() { + return where; + } + + public void setWhere(SqlNode whereClause) { + this.where = whereClause; + } + + @Nonnull + public final SqlNodeList getWindowList() { + return windowDecls; + } + + public final SqlNodeList getOrderList() { + return orderBy; + } + + public void setOrderBy(SqlNodeList orderBy) { + this.orderBy = orderBy; + } + + public final SqlNode getOffset() { + return offset; + } + + public void setOffset(SqlNode offset) { + this.offset = offset; + } + + public final SqlNode getFetch() { + return fetch; + } + + public void setFetch(SqlNode fetch) { + this.fetch = fetch; + } + + public void setHints(SqlNodeList hints) { + this.hints = hints; + } + + public SqlNodeList getHints() { + return this.hints; + } + + public boolean hasHints() { + // The hints may be passed as null explicitly. + return this.hints != null && this.hints.size() > 0; + } + + @Override + public void validate(SqlValidator validator, SqlValidatorScope scope) { + validator.validateQuery(this, scope, validator.getUnknownType()); + } + + /** + * Override SqlCall, to introduce a sub-query frame. + */ + @Override + public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { + if (!writer.inQuery()) { + // If this SELECT is the topmost item in a sub-query, introduce a new + // frame. (The topmost item in the sub-query might be a UNION or + // ORDER. In this case, we don't need a wrapper frame.) + final SqlWriter.Frame frame = writer.startList(SqlWriter.FrameTypeEnum.SUB_QUERY, "(", ")"); + writer.getDialect().unparseCall(writer, this, 0, 0); + writer.endList(frame); + } else { + writer.getDialect().unparseCall(writer, this, leftPrec, rightPrec); + } + } + + public boolean hasOrderBy() { + return orderBy != null && orderBy.size() != 0; + } + + public boolean hasWhere() { + return where != null; + } + + public boolean isKeywordPresent(SqlSelectKeyword targetKeyWord) { + return getModifierNode(targetKeyWord) != null; + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelColumnOrigin.java new file mode 100644 index 00000000..131d7894 --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelColumnOrigin + *

          + * Modification point: + *

            + *
          1. add transform field and related code. + *
          + * + * @description: RelColumnOrigin is a data structure describing one of the origins of an + * output column produced by a relational expression. + * @author: HamaWhite + */ +public class RelColumnOrigin { + // ~ Instance fields -------------------------------------------------------- + + private final RelOptTable originTable; + + private final int iOriginColumn; + + private final boolean isDerived; + + /** + * Stores the expression for data conversion, + * which source table fields are transformed by which expression the target field + */ + private String transform; + + // ~ Constructors ----------------------------------------------------------- + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + } + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived, + String transform) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + this.transform = transform; + } + + // ~ Methods ---------------------------------------------------------------- + + /** + * Returns table of origin. + */ + public RelOptTable getOriginTable() { + return originTable; + } + + /** + * Returns the 0-based index of column in origin table; whether this ordinal + * is flattened or unflattened depends on whether UDT flattening has already + * been performed on the relational expression which produced this + * description. + */ + public int getOriginColumnOrdinal() { + return iOriginColumn; + } + + /** + * Consider the query select a+b as c, d as e from t. The + * output column c has two origins (a and b), both of them derived. The + * output column d as one origin (c), which is not derived. + * + * @return false if value taken directly from column in origin table; true + * otherwise + */ + public boolean isDerived() { + return isDerived; + } + + public String getTransform() { + return transform; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof RelColumnOrigin)) { + return false; + } + RelColumnOrigin other = (RelColumnOrigin) obj; + return originTable.getQualifiedName().equals( + other.originTable.getQualifiedName()) + && (iOriginColumn == other.iOriginColumn) + && (isDerived == other.isDerived); + } + + @Override + public int hashCode() { + return originTable.getQualifiedName().hashCode() + + iOriginColumn + (isDerived ? 313 : 0); + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java new file mode 100644 index 00000000..3fbffe06 --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -0,0 +1,550 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.core.*; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.*; +import org.apache.calcite.util.BuiltInMethod; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static com.zto.fire.common.util.Constant.DELIMITER; +import static com.zto.fire.common.util.Constant.INITIAL_CAPACITY; + + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelMdColumnOrigins + * + *

          Modification point: + *

            + *
          1. Support lookup join, add method getColumnOrigins(Snapshot rel,RelMetadataQuery mq, int iOutputColumn) + *
          2. Support watermark, add method getColumnOrigins(SingleRel rel,RelMetadataQuery mq, int iOutputColumn) + *
          3. Support table function, add method getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) + *
          4. Support CEP, add method getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) + *
          5. Support transform, add method createDerivedColumnOrigins(Set inputSet, String transform, boolean originTransform), and related code + *
          6. Support field AS LOCALTIMESTAMP, modify method getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
          7. Support PROCTIME() is the first filed, add method computeIndexWithOffset, used by getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
          8. Support TVF, modify method getColumnOrigins(TableFunctionScan rel, RelMetadataQuery mq, int iOutputColumn) + *
          + * + * @description: RelMdColumnOrigins supplies a default implementation of {@link RelMetadataQuery#getColumnOrigins} for the standard logical algebra. + * @author: HamaWhite + */ +public class RelMdColumnOrigins implements MetadataHandler { + + private static final Logger LOG = LoggerFactory.getLogger(RelMdColumnOrigins.class); + + private final Pattern pattern = Pattern.compile("\\$[\\w.]+"); + + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.COLUMN_ORIGIN.method, new RelMdColumnOrigins()); + + // ~ Constructors ----------------------------------------------------------- + + private RelMdColumnOrigins() { + } + + // ~ Methods ---------------------------------------------------------------- + + public MetadataDef getDef() { + return BuiltInMetadata.ColumnOrigin.DEF; + } + + public Set getColumnOrigins(Aggregate rel, RelMetadataQuery mq, int iOutputColumn) { + if (iOutputColumn < rel.getGroupCount()) { + // get actual index of Group columns. + return mq.getColumnOrigins(rel.getInput(), rel.getGroupSet().asList().get(iOutputColumn)); + } + + // Aggregate columns are derived from input columns + AggregateCall call = rel.getAggCallList().get(iOutputColumn - rel.getGroupCount()); + final Set set = new LinkedHashSet<>(); + for (Integer iInput : call.getArgList()) { + set.addAll(mq.getColumnOrigins(rel.getInput(), iInput)); + } + return createDerivedColumnOrigins(set, call); + } + + public Set getColumnOrigins(Join rel, RelMetadataQuery mq, int iOutputColumn) { + int nLeftColumns = rel.getLeft().getRowType().getFieldList().size(); + Set set; + boolean derived = false; + if (iOutputColumn < nLeftColumns) { + set = mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + if (rel.getJoinType().generatesNullsOnLeft()) { + derived = true; + } + } else { + set = mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + if (rel.getJoinType().generatesNullsOnRight()) { + derived = true; + } + } + if (derived) { + // nulls are generated due to outer join; that counts + // as derivation + set = createDerivedColumnOrigins(set); + } + return set; + } + + /** + * Support the field blood relationship of table function + */ + public Set getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) { + List fieldNameList = rel.getLeft().getRowType().getFieldNames(); + int nLeftColumns = fieldNameList.size(); + if (iOutputColumn < nLeftColumns) { + return mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + } else { + if (rel.getRight() instanceof TableFunctionScan) { + final Set set = new LinkedHashSet<>(); + for (Integer iInput : rel.getRequiredColumns().asList()) { + set.addAll(mq.getColumnOrigins(rel.getLeft(), iInput)); + } + // get the field name of the left table configured in the Table Function on the right + TableFunctionScan tableFunctionScan = (TableFunctionScan) rel.getRight(); + String transform = computeTransform(set, tableFunctionScan.getCall()) + + DELIMITER + + tableFunctionScan.getRowType().getFieldNames().get(iOutputColumn - nLeftColumns); + return createDerivedColumnOrigins(set, transform); + } + return mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + } + } + + public Set getColumnOrigins(SetOp rel, RelMetadataQuery mq, int iOutputColumn) { + final Set set = new LinkedHashSet<>(); + for (RelNode input : rel.getInputs()) { + Set inputSet = mq.getColumnOrigins(input, iOutputColumn); + if (inputSet == null) { + return Collections.emptySet(); + } + set.addAll(inputSet); + } + return set; + } + + /** + * Support the field blood relationship of lookup join + */ + public Set getColumnOrigins(Snapshot rel, RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support the field blood relationship of watermark + */ + public Set getColumnOrigins(SingleRel rel, RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support for new fields in the source table similar to those created with the LOCALTIMESTAMP function + */ + public Set getColumnOrigins(Project rel, final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + RexNode rexNode = rel.getProjects().get(iOutputColumn); + + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + int index = inputRef.getIndex(); + if (input instanceof TableScan) { + index = computeIndexWithOffset(rel.getProjects(), inputRef.getIndex(), iOutputColumn); + } + return mq.getColumnOrigins(input, index); + } else if (input instanceof TableScan && rexNode.getClass().equals(RexCall.class) + && ((RexCall) rexNode).getOperands().isEmpty()) { + return mq.getColumnOrigins(input, iOutputColumn); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set, rexNode); + } + + private int computeIndexWithOffset(List projects, int baseIndex, int iOutputColumn) { + int offset = 0; + for (int index = 0; index < iOutputColumn; index++) { + RexNode rexNode = projects.get(index); + if ((rexNode.getClass().equals(RexCall.class) && ((RexCall) rexNode).getOperands().isEmpty())) { + offset += 1; + } + } + return baseIndex + offset; + } + + /** + * Support field blood relationship of CEP. + * The first column is the field after PARTITION BY, and the other columns come from the measures in Match + */ + public Set getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + List fieldNameList = input.getRowType().getFieldNames(); + String fieldName = rel.getRowType().getFieldNames().get(iOutputColumn); + + // 1. get the column names of the partitioned keys. + Set partitionKeySet = rel.getPartitionKeys().toList() + .stream() + .map(fieldNameList::get) + .collect(Collectors.toSet()); + + // 2. get the lineage of these partitioned columns. + if (partitionKeySet.contains(fieldName)) { + return mq.getColumnOrigins(input, fieldNameList.indexOf(fieldName)); + } + + // 3. the rest of the iOutputColumn must be derived by `MEASURES`. + RexNode rexNode = rel.getMeasures().get(fieldName); + RexPatternFieldRef rexPatternFieldRef = searchRexPatternFieldRef(rexNode); + if (rexPatternFieldRef != null) { + final Set set = mq.getColumnOrigins(input, rexPatternFieldRef.getIndex()); + if (rexNode instanceof RexCall) { + return createDerivedColumnOrigins(set, ((RexCall) rexNode).getOperands().get(0)); + } else { + return createDerivedColumnOrigins(set); + } + } + // 4. something unsupported yet. + LOG.warn("Parse column lineage failed, rel:[{}], iOutputColumn:[{}]", rel, iOutputColumn); + return Collections.emptySet(); + } + + private RexPatternFieldRef searchRexPatternFieldRef(RexNode rexNode) { + if (rexNode instanceof RexCall) { + RexNode operand = ((RexCall) rexNode).getOperands().get(0); + if (operand instanceof RexPatternFieldRef) { + return (RexPatternFieldRef) operand; + } else { + // recursive search + return searchRexPatternFieldRef(operand); + } + } + return null; + } + + public Set getColumnOrigins(Calc rel, final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + final RexShuttle rexShuttle = new RexShuttle() { + + @Override + public RexNode visitLocalRef(RexLocalRef localRef) { + return rel.getProgram().expandLocalRef(localRef); + } + }; + + List projects = new ArrayList<>(rexShuttle.apply(rel.getProgram().getProjectList())); + + final RexNode rexNode = projects.get(iOutputColumn); + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + return mq.getColumnOrigins(input, inputRef.getIndex()); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set); + } + + public Set getColumnOrigins(Filter rel, RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Sort rel, RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableModify rel, RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Exchange rel, RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableFunctionScan rel, RelMetadataQuery mq, int iOutputColumn) { + Set set = new LinkedHashSet<>(); + Set mappings = rel.getColumnMappings(); + if (mappings == null) { + if (!rel.getInputs().isEmpty()) { + RelNode input = rel.getInput(0); + List inputFieldList = input.getRowType().getFieldList(); + int nInputColumns = inputFieldList.size(); + if (iOutputColumn < nInputColumns) { + return mq.getColumnOrigins(input, iOutputColumn); + } else { + RexCall rexCall = (RexCall) rel.getCall(); + List operands = rexCall.getOperands(); + RexInputRef rexInputRef = (RexInputRef) ((RexCall) operands.get(1)).getOperands().get(0); + set = mq.getColumnOrigins(input, rexInputRef.getIndex()); + + String transform = rexCall.op.getName() + + DELIMITER + + rexCall.getType().getFieldNames().get(iOutputColumn); + return createDerivedColumnOrigins(set, transform); + } + } else { + // This is a leaf transformation: say there are fer sure no + // column origins. + return set; + } + } + for (RelColumnMapping mapping : mappings) { + if (mapping.iOutputColumn != iOutputColumn) { + continue; + } + final RelNode input = rel.getInputs().get(mapping.iInputRel); + final int column = mapping.iInputColumn; + Set origins = mq.getColumnOrigins(input, column); + if (origins == null) { + return Collections.emptySet(); + } + if (mapping.derived) { + origins = createDerivedColumnOrigins(origins); + } + set.addAll(origins); + } + return set; + } + + /** + * Catch-all rule when none of the others apply. + */ + @SuppressWarnings("squid:S1172") + public Set getColumnOrigins(RelNode rel, RelMetadataQuery mq, int iOutputColumn) { + // NOTE jvs 28-Mar-2006: We may get this wrong for a physical table + // expression which supports projections. In that case, + // it's up to the plugin writer to override with the + // correct information. + + if (!rel.getInputs().isEmpty()) { + // No generic logic available for non-leaf rels. + return Collections.emptySet(); + } + + final Set set = new LinkedHashSet<>(); + + RelOptTable table = rel.getTable(); + if (table == null) { + // Somebody is making column values up out of thin air, like a + // VALUES clause, so we return an empty set. + return set; + } + + // Detect the case where a physical table expression is performing + // projection, and say we don't know instead of making any assumptions. + // (Theoretically we could try to map the projection using column + // names.) This detection assumes the table expression doesn't handle + // rename as well. + if (table.getRowType() != rel.getRowType()) { + return Collections.emptySet(); + } + + set.add(new RelColumnOrigin(table, iOutputColumn, false)); + return set; + } + + private Set createDerivedColumnOrigins(Set inputSet) { + if (inputSet == null) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = + new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true); + set.add(derived); + } + return set; + } + + private Set createDerivedColumnOrigins(Set inputSet, Object transform) { + if (inputSet == null || inputSet.isEmpty()) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + + String finalTransform = computeTransform(inputSet, transform); + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = + new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true, + finalTransform); + set.add(derived); + } + return set; + } + + /** + * Replace the variable at the beginning of $ in input with the real field information + */ + private String computeTransform(Set inputSet, Object transform) { + LOG.debug("origin transform: {}, class: {}", transform, transform.getClass()); + String finalTransform = transform.toString(); + + Matcher matcher = pattern.matcher(finalTransform); + + Set operandSet = new LinkedHashSet<>(); + while (matcher.find()) { + operandSet.add(matcher.group()); + } + + if (operandSet.isEmpty()) { + return finalTransform; + } + if (inputSet.size() != operandSet.size()) { + LOG.warn("The number [{}] of fields in the source tables are not equal to operands [{}]", inputSet.size(), + operandSet.size()); + return null; + } + + Map sourceColumnMap = buildSourceColumnMap(inputSet, transform); + + matcher = pattern.matcher(finalTransform); + String temp; + while (matcher.find()) { + temp = matcher.group(); + finalTransform = finalTransform.replace(temp, sourceColumnMap.get(temp)); + } + // temporary special treatment + finalTransform = finalTransform.replace("_UTF-16LE", ""); + LOG.debug("final transform: {}", finalTransform); + return finalTransform; + } + + /** + * According to the order of generating inputSet, generate the corresponding index number. + * + *

          + * for example: ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $3 DESC NULLS LAST), + * The order of inputSet is $3, $0, instead of $0, $3 obtained by traversing the above string normally + */ + private Map buildSourceColumnMap(Set inputSet, Object transform) { + Set traversalSet = new LinkedHashSet<>(); + if (transform instanceof AggregateCall) { + AggregateCall call = ((AggregateCall) transform); + traversalSet.addAll(call.getArgList()); + } else if (transform instanceof RexNode) { + RexNode rexNode = (RexNode) transform; + RexVisitor visitor = + new RexVisitorImpl(true) { + + @Override + public Void visitInputRef(RexInputRef inputRef) { + traversalSet.add(inputRef.getIndex()); + return null; + } + + @Override + public Void visitPatternFieldRef(RexPatternFieldRef fieldRef) { + traversalSet.add(fieldRef.getIndex()); + return null; + } + + @Override + public Void visitFieldAccess(RexFieldAccess fieldAccess) { + traversalSet.add(fieldAccess.toString().replace("$", "")); + return null; + } + }; + rexNode.accept(visitor); + } + Map sourceColumnMap = new HashMap<>(INITIAL_CAPACITY); + Iterator iterator = optimizeSourceColumnSet(inputSet).iterator(); + traversalSet.forEach(index -> sourceColumnMap.put("$" + index, iterator.next())); + LOG.debug("sourceColumnMap: {}", sourceColumnMap); + return sourceColumnMap; + } + + /** + * Increase the readability of transform. + * if catalog, database and table are the same, return field. + * If the catalog and database are the same, return the table and field. + * If the catalog is the same, return the database, table, field. + * Otherwise, return all + */ + private Set optimizeSourceColumnSet(Set inputSet) { + Set catalogSet = new HashSet<>(); + Set databaseSet = new HashSet<>(); + Set tableSet = new HashSet<>(); + Set> qualifiedSet = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelOptTable originTable = rco.getOriginTable(); + List qualifiedName = originTable.getQualifiedName(); + + // catalog,database,table,field + List qualifiedList = new ArrayList<>(qualifiedName); + catalogSet.add(qualifiedName.get(0)); + databaseSet.add(qualifiedName.get(1)); + tableSet.add(qualifiedName.get(2)); + + String field = rco.getTransform() != null ? rco.getTransform() + : originTable.getRowType().getFieldNames().get(rco.getOriginColumnOrdinal()); + qualifiedList.add(field); + qualifiedSet.add(qualifiedList); + } + if (catalogSet.size() == 1 && databaseSet.size() == 1 && tableSet.size() == 1) { + return optimizeName(qualifiedSet, e -> e.get(3)); + } else if (catalogSet.size() == 1 && databaseSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(2, 4))); + } else if (catalogSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(1, 4))); + } else { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e)); + } + } + + private Set optimizeName(Set> qualifiedSet, Function, String> mapper) { + return qualifiedSet.stream().map(mapper).collect(Collectors.toCollection(LinkedHashSet::new)); + } + + private Set getMultipleColumns(RexNode rexNode, RelNode input, final RelMetadataQuery mq) { + final Set set = new LinkedHashSet<>(); + final RexVisitor visitor = + new RexVisitorImpl(true) { + + @Override + public Void visitInputRef(RexInputRef inputRef) { + Set inputSet = + mq.getColumnOrigins(input, inputRef.getIndex()); + if (inputSet != null) { + set.addAll(inputSet); + } + return null; + } + }; + rexNode.accept(visitor); + return set; + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelColumnOrigin.java new file mode 100644 index 00000000..358299d5 --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -0,0 +1,125 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelColumnOrigin + *

          + * Modification point: + *

            + *
          1. add transform field and related code. + *
          + * + * @description: RelColumnOrigin is a data structure describing one of the origins of an + * output column produced by a relational expression. + * @author: HamaWhite + */ +public class RelColumnOrigin { + // ~ Instance fields -------------------------------------------------------- + + private final RelOptTable originTable; + + private final int iOriginColumn; + + private final boolean isDerived; + + /** + * Stores the expression for data conversion, + * which source table fields are transformed by which expression the target field + */ + private String transform; + + // ~ Constructors ----------------------------------------------------------- + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + } + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived, + String transform) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + this.transform = transform; + } + + // ~ Methods ---------------------------------------------------------------- + + /** + * Returns table of origin. + */ + public RelOptTable getOriginTable() { + return originTable; + } + + /** + * Returns the 0-based index of column in origin table; whether this ordinal + * is flattened or unflattened depends on whether UDT flattening has already + * been performed on the relational expression which produced this + * description. + */ + public int getOriginColumnOrdinal() { + return iOriginColumn; + } + + /** + * Consider the query select a+b as c, d as e from t. The + * output column c has two origins (a and b), both of them derived. The + * output column d as one origin (c), which is not derived. + * + * @return false if value taken directly from column in origin table; true + * otherwise + */ + public boolean isDerived() { + return isDerived; + } + + public String getTransform() { + return transform; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof RelColumnOrigin)) { + return false; + } + RelColumnOrigin other = (RelColumnOrigin) obj; + return originTable.getQualifiedName().equals( + other.originTable.getQualifiedName()) + && (iOriginColumn == other.iOriginColumn) + && (isDerived == other.isDerived); + } + + @Override + public int hashCode() { + return originTable.getQualifiedName().hashCode() + + iOriginColumn + (isDerived ? 313 : 0); + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java new file mode 100644 index 00000000..f3f0f57d --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -0,0 +1,551 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Calc; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.Exchange; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Match; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.core.Snapshot; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.TableFunctionScan; +import org.apache.calcite.rel.core.TableModify; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLocalRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexPatternFieldRef; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexVisitor; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.util.BuiltInMethod; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelMdColumnOrigins + * + *

          Modification point: + *

            + *
          1. Support lookup join, add method getColumnOrigins(Snapshot rel,RelMetadataQuery mq, int iOutputColumn) + *
          2. Support watermark, add method getColumnOrigins(SingleRel rel,RelMetadataQuery mq, int iOutputColumn) + *
          3. Support table function, add method getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) + *
          4. Support CEP, add method getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) + *
          5. Support transform, add method createDerivedColumnOrigins(Set inputSet, String transform, boolean originTransform), and related code + *
          6. Support field AS LOCALTIMESTAMP, modify method getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
          7. Support PROCTIME() is the first filed, add method computeIndexWithOffset, used by getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
              + * + * @description: RelMdColumnOrigins supplies a default implementation of {@link RelMetadataQuery#getColumnOrigins} for the standard logical algebra. + * @author: HamaWhite + */ +public class RelMdColumnOrigins implements MetadataHandler { + + private static final Logger LOG = LoggerFactory.getLogger(RelMdColumnOrigins.class); + + public static final String DELIMITER = "."; + + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.COLUMN_ORIGIN.method, new RelMdColumnOrigins()); + + // ~ Constructors ----------------------------------------------------------- + + private RelMdColumnOrigins() { + } + + // ~ Methods ---------------------------------------------------------------- + + public MetadataDef getDef() { + return BuiltInMetadata.ColumnOrigin.DEF; + } + + public Set getColumnOrigins(Aggregate rel, + RelMetadataQuery mq, int iOutputColumn) { + if (iOutputColumn < rel.getGroupCount()) { + // get actual index of Group columns. + return mq.getColumnOrigins(rel.getInput(), rel.getGroupSet().asList().get(iOutputColumn)); + } + + // Aggregate columns are derived from input columns + AggregateCall call = rel.getAggCallList().get(iOutputColumn + - rel.getGroupCount()); + + final Set set = new LinkedHashSet<>(); + for (Integer iInput : call.getArgList()) { + Set inputSet = mq.getColumnOrigins(rel.getInput(), iInput); + inputSet = createDerivedColumnOrigins(inputSet, call.toString(), true); + if (inputSet != null) { + set.addAll(inputSet); + } + } + return set; + } + + public Set getColumnOrigins(Join rel, RelMetadataQuery mq, + int iOutputColumn) { + int nLeftColumns = rel.getLeft().getRowType().getFieldList().size(); + Set set; + boolean derived = false; + if (iOutputColumn < nLeftColumns) { + set = mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + if (rel.getJoinType().generatesNullsOnLeft()) { + derived = true; + } + } else { + set = mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + if (rel.getJoinType().generatesNullsOnRight()) { + derived = true; + } + } + if (derived) { + // nulls are generated due to outer join; that counts + // as derivation + set = createDerivedColumnOrigins(set); + } + return set; + } + + /** + * Support the field blood relationship of table function + */ + public Set getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) { + + List leftFieldList = rel.getLeft().getRowType().getFieldList(); + + int nLeftColumns = leftFieldList.size(); + Set set; + if (iOutputColumn < nLeftColumns) { + set = mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + } else { + if (rel.getRight() instanceof TableFunctionScan) { + // get the field name of the left table configured in the Table Function on the right + TableFunctionScan tableFunctionScan = (TableFunctionScan) rel.getRight(); + RexCall rexCall = (RexCall) tableFunctionScan.getCall(); + // support only one field in table function + RexFieldAccess rexFieldAccess = (RexFieldAccess) rexCall.getOperands().get(0); + String fieldName = rexFieldAccess.getField().getName(); + + int leftFieldIndex = 0; + for (int i = 0; i < nLeftColumns; i++) { + if (leftFieldList.get(i).getName().equalsIgnoreCase(fieldName)) { + leftFieldIndex = i; + break; + } + } + /** + * Get the fields from the left table, don't go to + * getColumnOrigins(TableFunctionScan rel,RelMetadataQuery mq, int iOutputColumn), + * otherwise the return is null, and the UDTF field origin cannot be parsed + */ + set = mq.getColumnOrigins(rel.getLeft(), leftFieldIndex); + + // process transform for udtf + String transform = rexCall.toString().replace(rexFieldAccess.toString(), fieldName) + + DELIMITER + + tableFunctionScan.getRowType().getFieldNames().get(iOutputColumn - nLeftColumns); + set = createDerivedColumnOrigins(set, transform, false); + } else { + set = mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + } + } + return set; + } + + public Set getColumnOrigins(SetOp rel, + RelMetadataQuery mq, int iOutputColumn) { + final Set set = new LinkedHashSet<>(); + for (RelNode input : rel.getInputs()) { + Set inputSet = mq.getColumnOrigins(input, iOutputColumn); + if (inputSet == null) { + return Collections.emptySet(); + } + set.addAll(inputSet); + } + return set; + } + + /** + * Support the field blood relationship of lookup join + */ + public Set getColumnOrigins(Snapshot rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support the field blood relationship of watermark + */ + public Set getColumnOrigins(SingleRel rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support for new fields in the source table similar to those created with the LOCALTIMESTAMP function + */ + public Set getColumnOrigins(Project rel, + final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + RexNode rexNode = rel.getProjects().get(iOutputColumn); + + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + int index = inputRef.getIndex(); + if (input instanceof TableScan) { + index = computeIndexWithOffset(rel.getProjects(), inputRef.getIndex(), iOutputColumn); + } + return mq.getColumnOrigins(input, index); + } else if (input instanceof TableScan && rexNode.getClass().equals(RexCall.class) + && ((RexCall) rexNode).getOperands().isEmpty()) { + return mq.getColumnOrigins(input, iOutputColumn); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set, rexNode.toString(), true); + } + + private int computeIndexWithOffset(List projects, int baseIndex, int iOutputColumn) { + int offset = 0; + for (int index = 0; index < iOutputColumn; index++) { + RexNode rexNode = projects.get(index); + if ((rexNode.getClass().equals(RexCall.class) && ((RexCall) rexNode).getOperands().isEmpty())) { + offset += 1; + } + } + return baseIndex + offset; + } + + /** + * Support field blood relationship of CEP. + * The first column is the field after PARTITION BY, and the other columns come from the measures in Match + */ + public Set getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) { + int orderCount = rel.getOrderKeys().getKeys().size(); + + if (iOutputColumn < orderCount) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + final RelNode input = rel.getInput(); + RexNode rexNode = rel.getMeasures().values().asList().get(iOutputColumn - orderCount); + + RexPatternFieldRef rexPatternFieldRef = searchRexPatternFieldRef(rexNode); + if (rexPatternFieldRef != null) { + final Set set = mq.getColumnOrigins(input, rexPatternFieldRef.getIndex()); + String originTransform = rexNode instanceof RexCall + ? ((RexCall) rexNode).getOperands().get(0).toString() + : null; + return createDerivedColumnOrigins(set, originTransform, true); + } + return Collections.emptySet(); + } + + private RexPatternFieldRef searchRexPatternFieldRef(RexNode rexNode) { + if (rexNode instanceof RexCall) { + RexNode operand = ((RexCall) rexNode).getOperands().get(0); + if (operand instanceof RexPatternFieldRef) { + return (RexPatternFieldRef) operand; + } else { + // recursive search + return searchRexPatternFieldRef(operand); + } + } + return null; + } + + public Set getColumnOrigins(Calc rel, + final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + final RexShuttle rexShuttle = new RexShuttle() { + + @Override + public RexNode visitLocalRef(RexLocalRef localRef) { + return rel.getProgram().expandLocalRef(localRef); + } + }; + final List projects = new ArrayList<>(); + for (RexNode rex : rexShuttle.apply(rel.getProgram().getProjectList())) { + projects.add(rex); + } + final RexNode rexNode = projects.get(iOutputColumn); + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + return mq.getColumnOrigins(input, inputRef.getIndex()); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set); + } + + public Set getColumnOrigins(Filter rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Sort rel, RelMetadataQuery mq, + int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableModify rel, RelMetadataQuery mq, + int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Exchange rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableFunctionScan rel, + RelMetadataQuery mq, int iOutputColumn) { + final Set set = new LinkedHashSet<>(); + Set mappings = rel.getColumnMappings(); + if (mappings == null) { + if (!rel.getInputs().isEmpty()) { + // This is a non-leaf transformation: say we don't + // know about origins, because there are probably + // columns below. + return Collections.emptySet(); + } else { + // This is a leaf transformation: say there are fer sure no + // column origins. + return set; + } + } + for (RelColumnMapping mapping : mappings) { + if (mapping.iOutputColumn != iOutputColumn) { + continue; + } + final RelNode input = rel.getInputs().get(mapping.iInputRel); + final int column = mapping.iInputColumn; + Set origins = mq.getColumnOrigins(input, column); + if (origins == null) { + return Collections.emptySet(); + } + if (mapping.derived) { + origins = createDerivedColumnOrigins(origins); + } + set.addAll(origins); + } + return set; + } + + // Catch-all rule when none of the others apply. + @SuppressWarnings("squid:S1172") + public Set getColumnOrigins(RelNode rel, + RelMetadataQuery mq, int iOutputColumn) { + // NOTE jvs 28-Mar-2006: We may get this wrong for a physical table + // expression which supports projections. In that case, + // it's up to the plugin writer to override with the + // correct information. + + if (!rel.getInputs().isEmpty()) { + // No generic logic available for non-leaf rels. + return Collections.emptySet(); + } + + final Set set = new LinkedHashSet<>(); + + RelOptTable table = rel.getTable(); + if (table == null) { + // Somebody is making column values up out of thin air, like a + // VALUES clause, so we return an empty set. + return set; + } + + // Detect the case where a physical table expression is performing + // projection, and say we don't know instead of making any assumptions. + // (Theoretically we could try to map the projection using column + // names.) This detection assumes the table expression doesn't handle + // rename as well. + if (table.getRowType() != rel.getRowType()) { + return Collections.emptySet(); + } + + set.add(new RelColumnOrigin(table, iOutputColumn, false)); + return set; + } + + private Set createDerivedColumnOrigins( + Set inputSet) { + if (inputSet == null) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true); + set.add(derived); + } + return set; + } + + private Set createDerivedColumnOrigins( + Set inputSet, String transform, boolean originTransform) { + if (inputSet == null || inputSet.isEmpty()) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + + String finalTransform = originTransform ? computeTransform(inputSet, transform) : transform; + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true, + finalTransform); + set.add(derived); + } + return set; + } + + /** + * Replace the variable at the beginning of $ in input with the real field information + */ + private String computeTransform(Set inputSet, String transform) { + LOG.debug("origin transform: {}", transform); + Pattern pattern = Pattern.compile("\\$\\d+"); + Matcher matcher = pattern.matcher(transform); + + Set operandSet = new LinkedHashSet<>(); + while (matcher.find()) { + operandSet.add(matcher.group()); + } + + if (operandSet.isEmpty()) { + LOG.info("operandSet is empty"); + return null; + } + if (inputSet.size() != operandSet.size()) { + LOG.warn("The number [{}] of fields in the source tables are not equal to operands [{}]", inputSet.size(), + operandSet.size()); + return null; + } + + Map sourceColumnMap = new HashMap<>(); + Iterator iterator = optimizeSourceColumnSet(inputSet).iterator(); + operandSet.forEach(e -> sourceColumnMap.put(e, iterator.next())); + LOG.debug("sourceColumnMap: {}", sourceColumnMap); + + matcher = pattern.matcher(transform); + String temp; + while (matcher.find()) { + temp = matcher.group(); + transform = transform.replace(temp, sourceColumnMap.get(temp)); + } + + // temporary special treatment + transform = transform.replace("_UTF-16LE", ""); + LOG.debug("transform: {}", transform); + return transform; + } + + /** + * Increase the readability of transform. + * if catalog, database and table are the same, return field. + * If the catalog and database are the same, return the table and field. + * If the catalog is the same, return the database, table, field. + * Otherwise, return all + */ + private Set optimizeSourceColumnSet(Set inputSet) { + Set catalogSet = new HashSet<>(); + Set databaseSet = new HashSet<>(); + Set tableSet = new HashSet<>(); + Set> qualifiedSet = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelOptTable originTable = rco.getOriginTable(); + List qualifiedName = originTable.getQualifiedName(); + + // catalog,database,table,field + List qualifiedList = new ArrayList<>(qualifiedName); + catalogSet.add(qualifiedName.get(0)); + databaseSet.add(qualifiedName.get(1)); + tableSet.add(qualifiedName.get(2)); + + String field = rco.getTransform() != null + ? rco.getTransform() + : originTable.getRowType().getFieldNames().get(rco.getOriginColumnOrdinal()); + qualifiedList.add(field); + qualifiedSet.add(qualifiedList); + } + if (catalogSet.size() == 1 && databaseSet.size() == 1 && tableSet.size() == 1) { + return optimizeName(qualifiedSet, e -> e.get(3)); + } else if (catalogSet.size() == 1 && databaseSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(2, 4))); + } else if (catalogSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(1, 4))); + } else { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e)); + } + } + + private Set optimizeName(Set> qualifiedSet, Function, String> mapper) { + return qualifiedSet.stream().map(mapper).collect(Collectors.toSet()); + } + + private Set getMultipleColumns(RexNode rexNode, RelNode input, + final RelMetadataQuery mq) { + final Set set = new LinkedHashSet<>(); + final RexVisitor visitor = new RexVisitorImpl(true) { + + @Override + public Void visitInputRef(RexInputRef inputRef) { + Set inputSet = mq.getColumnOrigins(input, inputRef.getIndex()); + if (inputSet != null) { + set.addAll(inputSet); + } + return null; + } + }; + rexNode.accept(visitor); + return set; + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelColumnOrigin.java new file mode 100644 index 00000000..358299d5 --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -0,0 +1,125 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelColumnOrigin + *

              + * Modification point: + *

                + *
              1. add transform field and related code. + *
              + * + * @description: RelColumnOrigin is a data structure describing one of the origins of an + * output column produced by a relational expression. + * @author: HamaWhite + */ +public class RelColumnOrigin { + // ~ Instance fields -------------------------------------------------------- + + private final RelOptTable originTable; + + private final int iOriginColumn; + + private final boolean isDerived; + + /** + * Stores the expression for data conversion, + * which source table fields are transformed by which expression the target field + */ + private String transform; + + // ~ Constructors ----------------------------------------------------------- + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + } + + public RelColumnOrigin( + RelOptTable originTable, + int iOriginColumn, + boolean isDerived, + String transform) { + this.originTable = originTable; + this.iOriginColumn = iOriginColumn; + this.isDerived = isDerived; + this.transform = transform; + } + + // ~ Methods ---------------------------------------------------------------- + + /** + * Returns table of origin. + */ + public RelOptTable getOriginTable() { + return originTable; + } + + /** + * Returns the 0-based index of column in origin table; whether this ordinal + * is flattened or unflattened depends on whether UDT flattening has already + * been performed on the relational expression which produced this + * description. + */ + public int getOriginColumnOrdinal() { + return iOriginColumn; + } + + /** + * Consider the query select a+b as c, d as e from t. The + * output column c has two origins (a and b), both of them derived. The + * output column d as one origin (c), which is not derived. + * + * @return false if value taken directly from column in origin table; true + * otherwise + */ + public boolean isDerived() { + return isDerived; + } + + public String getTransform() { + return transform; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof RelColumnOrigin)) { + return false; + } + RelColumnOrigin other = (RelColumnOrigin) obj; + return originTable.getQualifiedName().equals( + other.originTable.getQualifiedName()) + && (iOriginColumn == other.iOriginColumn) + && (isDerived == other.isDerived); + } + + @Override + public int hashCode() { + return originTable.getQualifiedName().hashCode() + + iOriginColumn + (isDerived ? 313 : 0); + } +} diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java new file mode 100644 index 00000000..f3f0f57d --- /dev/null +++ b/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -0,0 +1,551 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Calc; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.Exchange; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Match; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.core.Snapshot; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.TableFunctionScan; +import org.apache.calcite.rel.core.TableModify; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLocalRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexPatternFieldRef; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexVisitor; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.util.BuiltInMethod; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelMdColumnOrigins + * + *

              Modification point: + *

                + *
              1. Support lookup join, add method getColumnOrigins(Snapshot rel,RelMetadataQuery mq, int iOutputColumn) + *
              2. Support watermark, add method getColumnOrigins(SingleRel rel,RelMetadataQuery mq, int iOutputColumn) + *
              3. Support table function, add method getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) + *
              4. Support CEP, add method getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) + *
              5. Support transform, add method createDerivedColumnOrigins(Set inputSet, String transform, boolean originTransform), and related code + *
              6. Support field AS LOCALTIMESTAMP, modify method getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
              7. Support PROCTIME() is the first filed, add method computeIndexWithOffset, used by getColumnOrigins(Project rel, RelMetadataQuery mq, int iOutputColumn) + *
                  + * + * @description: RelMdColumnOrigins supplies a default implementation of {@link RelMetadataQuery#getColumnOrigins} for the standard logical algebra. + * @author: HamaWhite + */ +public class RelMdColumnOrigins implements MetadataHandler { + + private static final Logger LOG = LoggerFactory.getLogger(RelMdColumnOrigins.class); + + public static final String DELIMITER = "."; + + public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.COLUMN_ORIGIN.method, new RelMdColumnOrigins()); + + // ~ Constructors ----------------------------------------------------------- + + private RelMdColumnOrigins() { + } + + // ~ Methods ---------------------------------------------------------------- + + public MetadataDef getDef() { + return BuiltInMetadata.ColumnOrigin.DEF; + } + + public Set getColumnOrigins(Aggregate rel, + RelMetadataQuery mq, int iOutputColumn) { + if (iOutputColumn < rel.getGroupCount()) { + // get actual index of Group columns. + return mq.getColumnOrigins(rel.getInput(), rel.getGroupSet().asList().get(iOutputColumn)); + } + + // Aggregate columns are derived from input columns + AggregateCall call = rel.getAggCallList().get(iOutputColumn + - rel.getGroupCount()); + + final Set set = new LinkedHashSet<>(); + for (Integer iInput : call.getArgList()) { + Set inputSet = mq.getColumnOrigins(rel.getInput(), iInput); + inputSet = createDerivedColumnOrigins(inputSet, call.toString(), true); + if (inputSet != null) { + set.addAll(inputSet); + } + } + return set; + } + + public Set getColumnOrigins(Join rel, RelMetadataQuery mq, + int iOutputColumn) { + int nLeftColumns = rel.getLeft().getRowType().getFieldList().size(); + Set set; + boolean derived = false; + if (iOutputColumn < nLeftColumns) { + set = mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + if (rel.getJoinType().generatesNullsOnLeft()) { + derived = true; + } + } else { + set = mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + if (rel.getJoinType().generatesNullsOnRight()) { + derived = true; + } + } + if (derived) { + // nulls are generated due to outer join; that counts + // as derivation + set = createDerivedColumnOrigins(set); + } + return set; + } + + /** + * Support the field blood relationship of table function + */ + public Set getColumnOrigins(Correlate rel, RelMetadataQuery mq, int iOutputColumn) { + + List leftFieldList = rel.getLeft().getRowType().getFieldList(); + + int nLeftColumns = leftFieldList.size(); + Set set; + if (iOutputColumn < nLeftColumns) { + set = mq.getColumnOrigins(rel.getLeft(), iOutputColumn); + } else { + if (rel.getRight() instanceof TableFunctionScan) { + // get the field name of the left table configured in the Table Function on the right + TableFunctionScan tableFunctionScan = (TableFunctionScan) rel.getRight(); + RexCall rexCall = (RexCall) tableFunctionScan.getCall(); + // support only one field in table function + RexFieldAccess rexFieldAccess = (RexFieldAccess) rexCall.getOperands().get(0); + String fieldName = rexFieldAccess.getField().getName(); + + int leftFieldIndex = 0; + for (int i = 0; i < nLeftColumns; i++) { + if (leftFieldList.get(i).getName().equalsIgnoreCase(fieldName)) { + leftFieldIndex = i; + break; + } + } + /** + * Get the fields from the left table, don't go to + * getColumnOrigins(TableFunctionScan rel,RelMetadataQuery mq, int iOutputColumn), + * otherwise the return is null, and the UDTF field origin cannot be parsed + */ + set = mq.getColumnOrigins(rel.getLeft(), leftFieldIndex); + + // process transform for udtf + String transform = rexCall.toString().replace(rexFieldAccess.toString(), fieldName) + + DELIMITER + + tableFunctionScan.getRowType().getFieldNames().get(iOutputColumn - nLeftColumns); + set = createDerivedColumnOrigins(set, transform, false); + } else { + set = mq.getColumnOrigins(rel.getRight(), iOutputColumn - nLeftColumns); + } + } + return set; + } + + public Set getColumnOrigins(SetOp rel, + RelMetadataQuery mq, int iOutputColumn) { + final Set set = new LinkedHashSet<>(); + for (RelNode input : rel.getInputs()) { + Set inputSet = mq.getColumnOrigins(input, iOutputColumn); + if (inputSet == null) { + return Collections.emptySet(); + } + set.addAll(inputSet); + } + return set; + } + + /** + * Support the field blood relationship of lookup join + */ + public Set getColumnOrigins(Snapshot rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support the field blood relationship of watermark + */ + public Set getColumnOrigins(SingleRel rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + /** + * Support for new fields in the source table similar to those created with the LOCALTIMESTAMP function + */ + public Set getColumnOrigins(Project rel, + final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + RexNode rexNode = rel.getProjects().get(iOutputColumn); + + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + int index = inputRef.getIndex(); + if (input instanceof TableScan) { + index = computeIndexWithOffset(rel.getProjects(), inputRef.getIndex(), iOutputColumn); + } + return mq.getColumnOrigins(input, index); + } else if (input instanceof TableScan && rexNode.getClass().equals(RexCall.class) + && ((RexCall) rexNode).getOperands().isEmpty()) { + return mq.getColumnOrigins(input, iOutputColumn); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set, rexNode.toString(), true); + } + + private int computeIndexWithOffset(List projects, int baseIndex, int iOutputColumn) { + int offset = 0; + for (int index = 0; index < iOutputColumn; index++) { + RexNode rexNode = projects.get(index); + if ((rexNode.getClass().equals(RexCall.class) && ((RexCall) rexNode).getOperands().isEmpty())) { + offset += 1; + } + } + return baseIndex + offset; + } + + /** + * Support field blood relationship of CEP. + * The first column is the field after PARTITION BY, and the other columns come from the measures in Match + */ + public Set getColumnOrigins(Match rel, RelMetadataQuery mq, int iOutputColumn) { + int orderCount = rel.getOrderKeys().getKeys().size(); + + if (iOutputColumn < orderCount) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + final RelNode input = rel.getInput(); + RexNode rexNode = rel.getMeasures().values().asList().get(iOutputColumn - orderCount); + + RexPatternFieldRef rexPatternFieldRef = searchRexPatternFieldRef(rexNode); + if (rexPatternFieldRef != null) { + final Set set = mq.getColumnOrigins(input, rexPatternFieldRef.getIndex()); + String originTransform = rexNode instanceof RexCall + ? ((RexCall) rexNode).getOperands().get(0).toString() + : null; + return createDerivedColumnOrigins(set, originTransform, true); + } + return Collections.emptySet(); + } + + private RexPatternFieldRef searchRexPatternFieldRef(RexNode rexNode) { + if (rexNode instanceof RexCall) { + RexNode operand = ((RexCall) rexNode).getOperands().get(0); + if (operand instanceof RexPatternFieldRef) { + return (RexPatternFieldRef) operand; + } else { + // recursive search + return searchRexPatternFieldRef(operand); + } + } + return null; + } + + public Set getColumnOrigins(Calc rel, + final RelMetadataQuery mq, int iOutputColumn) { + final RelNode input = rel.getInput(); + final RexShuttle rexShuttle = new RexShuttle() { + + @Override + public RexNode visitLocalRef(RexLocalRef localRef) { + return rel.getProgram().expandLocalRef(localRef); + } + }; + final List projects = new ArrayList<>(); + for (RexNode rex : rexShuttle.apply(rel.getProgram().getProjectList())) { + projects.add(rex); + } + final RexNode rexNode = projects.get(iOutputColumn); + if (rexNode instanceof RexInputRef) { + // Direct reference: no derivation added. + RexInputRef inputRef = (RexInputRef) rexNode; + return mq.getColumnOrigins(input, inputRef.getIndex()); + } + // Anything else is a derivation, possibly from multiple columns. + final Set set = getMultipleColumns(rexNode, input, mq); + return createDerivedColumnOrigins(set); + } + + public Set getColumnOrigins(Filter rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Sort rel, RelMetadataQuery mq, + int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableModify rel, RelMetadataQuery mq, + int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(Exchange rel, + RelMetadataQuery mq, int iOutputColumn) { + return mq.getColumnOrigins(rel.getInput(), iOutputColumn); + } + + public Set getColumnOrigins(TableFunctionScan rel, + RelMetadataQuery mq, int iOutputColumn) { + final Set set = new LinkedHashSet<>(); + Set mappings = rel.getColumnMappings(); + if (mappings == null) { + if (!rel.getInputs().isEmpty()) { + // This is a non-leaf transformation: say we don't + // know about origins, because there are probably + // columns below. + return Collections.emptySet(); + } else { + // This is a leaf transformation: say there are fer sure no + // column origins. + return set; + } + } + for (RelColumnMapping mapping : mappings) { + if (mapping.iOutputColumn != iOutputColumn) { + continue; + } + final RelNode input = rel.getInputs().get(mapping.iInputRel); + final int column = mapping.iInputColumn; + Set origins = mq.getColumnOrigins(input, column); + if (origins == null) { + return Collections.emptySet(); + } + if (mapping.derived) { + origins = createDerivedColumnOrigins(origins); + } + set.addAll(origins); + } + return set; + } + + // Catch-all rule when none of the others apply. + @SuppressWarnings("squid:S1172") + public Set getColumnOrigins(RelNode rel, + RelMetadataQuery mq, int iOutputColumn) { + // NOTE jvs 28-Mar-2006: We may get this wrong for a physical table + // expression which supports projections. In that case, + // it's up to the plugin writer to override with the + // correct information. + + if (!rel.getInputs().isEmpty()) { + // No generic logic available for non-leaf rels. + return Collections.emptySet(); + } + + final Set set = new LinkedHashSet<>(); + + RelOptTable table = rel.getTable(); + if (table == null) { + // Somebody is making column values up out of thin air, like a + // VALUES clause, so we return an empty set. + return set; + } + + // Detect the case where a physical table expression is performing + // projection, and say we don't know instead of making any assumptions. + // (Theoretically we could try to map the projection using column + // names.) This detection assumes the table expression doesn't handle + // rename as well. + if (table.getRowType() != rel.getRowType()) { + return Collections.emptySet(); + } + + set.add(new RelColumnOrigin(table, iOutputColumn, false)); + return set; + } + + private Set createDerivedColumnOrigins( + Set inputSet) { + if (inputSet == null) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true); + set.add(derived); + } + return set; + } + + private Set createDerivedColumnOrigins( + Set inputSet, String transform, boolean originTransform) { + if (inputSet == null || inputSet.isEmpty()) { + return Collections.emptySet(); + } + final Set set = new LinkedHashSet<>(); + + String finalTransform = originTransform ? computeTransform(inputSet, transform) : transform; + for (RelColumnOrigin rco : inputSet) { + RelColumnOrigin derived = new RelColumnOrigin( + rco.getOriginTable(), + rco.getOriginColumnOrdinal(), + true, + finalTransform); + set.add(derived); + } + return set; + } + + /** + * Replace the variable at the beginning of $ in input with the real field information + */ + private String computeTransform(Set inputSet, String transform) { + LOG.debug("origin transform: {}", transform); + Pattern pattern = Pattern.compile("\\$\\d+"); + Matcher matcher = pattern.matcher(transform); + + Set operandSet = new LinkedHashSet<>(); + while (matcher.find()) { + operandSet.add(matcher.group()); + } + + if (operandSet.isEmpty()) { + LOG.info("operandSet is empty"); + return null; + } + if (inputSet.size() != operandSet.size()) { + LOG.warn("The number [{}] of fields in the source tables are not equal to operands [{}]", inputSet.size(), + operandSet.size()); + return null; + } + + Map sourceColumnMap = new HashMap<>(); + Iterator iterator = optimizeSourceColumnSet(inputSet).iterator(); + operandSet.forEach(e -> sourceColumnMap.put(e, iterator.next())); + LOG.debug("sourceColumnMap: {}", sourceColumnMap); + + matcher = pattern.matcher(transform); + String temp; + while (matcher.find()) { + temp = matcher.group(); + transform = transform.replace(temp, sourceColumnMap.get(temp)); + } + + // temporary special treatment + transform = transform.replace("_UTF-16LE", ""); + LOG.debug("transform: {}", transform); + return transform; + } + + /** + * Increase the readability of transform. + * if catalog, database and table are the same, return field. + * If the catalog and database are the same, return the table and field. + * If the catalog is the same, return the database, table, field. + * Otherwise, return all + */ + private Set optimizeSourceColumnSet(Set inputSet) { + Set catalogSet = new HashSet<>(); + Set databaseSet = new HashSet<>(); + Set tableSet = new HashSet<>(); + Set> qualifiedSet = new LinkedHashSet<>(); + for (RelColumnOrigin rco : inputSet) { + RelOptTable originTable = rco.getOriginTable(); + List qualifiedName = originTable.getQualifiedName(); + + // catalog,database,table,field + List qualifiedList = new ArrayList<>(qualifiedName); + catalogSet.add(qualifiedName.get(0)); + databaseSet.add(qualifiedName.get(1)); + tableSet.add(qualifiedName.get(2)); + + String field = rco.getTransform() != null + ? rco.getTransform() + : originTable.getRowType().getFieldNames().get(rco.getOriginColumnOrdinal()); + qualifiedList.add(field); + qualifiedSet.add(qualifiedList); + } + if (catalogSet.size() == 1 && databaseSet.size() == 1 && tableSet.size() == 1) { + return optimizeName(qualifiedSet, e -> e.get(3)); + } else if (catalogSet.size() == 1 && databaseSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(2, 4))); + } else if (catalogSet.size() == 1) { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e.subList(1, 4))); + } else { + return optimizeName(qualifiedSet, e -> String.join(DELIMITER, e)); + } + } + + private Set optimizeName(Set> qualifiedSet, Function, String> mapper) { + return qualifiedSet.stream().map(mapper).collect(Collectors.toSet()); + } + + private Set getMultipleColumns(RexNode rexNode, RelNode input, + final RelMetadataQuery mq) { + final Set set = new LinkedHashSet<>(); + final RexVisitor visitor = new RexVisitorImpl(true) { + + @Override + public Void visitInputRef(RexInputRef inputRef) { + Set inputSet = mq.getColumnOrigins(input, inputRef.getIndex()); + if (inputSet != null) { + set.addAll(inputSet); + } + return null; + } + }; + rexNode.accept(visitor); + return set; + } +} -- Gitee From 05f4def91005d074ab5f488ba0689ede35f582a8 Mon Sep 17 00:00:00 2001 From: wsczm <646659000@qq.com> Date: Mon, 4 Dec 2023 14:55:06 +0800 Subject: [PATCH 2/9] =?UTF-8?q?flink=20sql=E5=AD=97=E6=AE=B5=E8=A1=80?= =?UTF-8?q?=E7=BC=98=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fire-bundle/fire-bundle-spark/pom.xml | 4 ++-- fire-common/pom.xml | 6 ++++++ fire-engines/fire-flink/pom.xml | 10 ++++++++++ fire-enhance/apache-flink/pom.xml | 10 ++++++++-- fire-examples/flink-examples/pom.xml | 15 +++++++++++++++ pom.xml | 4 ++++ 6 files changed, 45 insertions(+), 4 deletions(-) diff --git a/fire-bundle/fire-bundle-spark/pom.xml b/fire-bundle/fire-bundle-spark/pom.xml index f2154d21..607c1d67 100644 --- a/fire-bundle/fire-bundle-spark/pom.xml +++ b/fire-bundle/fire-bundle-spark/pom.xml @@ -13,11 +13,11 @@ - + io.netty diff --git a/fire-common/pom.xml b/fire-common/pom.xml index e021e969..fd76428f 100644 --- a/fire-common/pom.xml +++ b/fire-common/pom.xml @@ -102,6 +102,12 @@ jsqlparser ${jsqlparser.version} + + org.projectlombok + lombok + 1.18.26 + compile + diff --git a/fire-engines/fire-flink/pom.xml b/fire-engines/fire-flink/pom.xml index bc67087c..73671775 100644 --- a/fire-engines/fire-flink/pom.xml +++ b/fire-engines/fire-flink/pom.xml @@ -170,6 +170,16 @@ org.apache.hive hive-exec ${hive.flink.version} + + + org.apache.calcite + calcite + + + org.apache.calcite + calcite-core + + ${maven.scope} diff --git a/fire-enhance/apache-flink/pom.xml b/fire-enhance/apache-flink/pom.xml index 4504e8d1..606da220 100644 --- a/fire-enhance/apache-flink/pom.xml +++ b/fire-enhance/apache-flink/pom.xml @@ -162,6 +162,12 @@ org.apache.hive hive-exec ${hive.apache.version} + + + org.apache.calcite + calcite-core + + ${maven.scope} @@ -218,12 +224,12 @@ ${curator.verrsion} - + diff --git a/fire-examples/flink-examples/pom.xml b/fire-examples/flink-examples/pom.xml index 0f9f1ead..1932aaf0 100644 --- a/fire-examples/flink-examples/pom.xml +++ b/fire-examples/flink-examples/pom.xml @@ -260,6 +260,21 @@ 11.2.0.3 ${maven.scope} + + com.ververica + flink-sql-connector-sqlserver-cdc + 2.2.0 + + + org.apache.doris + flink-doris-connector-1.15 + 1.2.0 + + + com.twitter + chill_2.12 + 0.10.0 + diff --git a/pom.xml b/pom.xml index 804e09ee..cb6f9db8 100644 --- a/pom.xml +++ b/pom.xml @@ -469,6 +469,10 @@ org.apache.spark spark-core_2.10 + + org.apache.calcite + calcite-core + -- Gitee From 1b2ae89671d5631f76a73e7e538ff2675e90b767 Mon Sep 17 00:00:00 2001 From: wsczm <646659000@qq.com> Date: Mon, 4 Dec 2023 16:20:13 +0800 Subject: [PATCH 3/9] =?UTF-8?q?flink=20sql=E5=AD=97=E6=AE=B5=E8=A1=80?= =?UTF-8?q?=E7=BC=98=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/zto/fire/flink/lineage/LineageContext.scala | 2 +- .../com/zto/fire/flink/lineage/LineageContext.scala | 2 +- .../com/zto/fire/flink/lineage/LineageContext.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala index 914cb8d8..a2db2e53 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala @@ -44,7 +44,7 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { val targetColumn = targetColumnList.get(index) val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index).asScala - if (relColumnOriginSet.isEmpty) { + if (relColumnOriginSet.nonEmpty) { for (rco: RelColumnOrigin <- relColumnOriginSet) { // table val table = rco.getOriginTable diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala index 5ca78f46..53e1718c 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala @@ -45,7 +45,7 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index) import scala.collection.JavaConversions._ - if (!relColumnOriginSet.isEmpty) { + if (!relColumnOriginSet.nonEmpty) { for (rco: RelColumnOrigin <- relColumnOriginSet) { // table val table = rco.getOriginTable diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala index 5ca78f46..53e1718c 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala @@ -45,7 +45,7 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index) import scala.collection.JavaConversions._ - if (!relColumnOriginSet.isEmpty) { + if (!relColumnOriginSet.nonEmpty) { for (rco: RelColumnOrigin <- relColumnOriginSet) { // table val table = rco.getOriginTable -- Gitee From ac3292463e63dc18dd81baaee632881d228cba87 Mon Sep 17 00:00:00 2001 From: wsczm <646659000@qq.com> Date: Mon, 4 Dec 2023 16:32:45 +0800 Subject: [PATCH 4/9] =?UTF-8?q?flink=20sql=E5=AD=97=E6=AE=B5=E8=A1=80?= =?UTF-8?q?=E7=BC=98=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/zto/fire/common/lineage/SQLLineageManager.scala | 8 ++++++++ .../scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala b/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala index 7d0b3c9d..371d6f10 100644 --- a/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala +++ b/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala @@ -53,6 +53,14 @@ private[fire] object SQLLineageManager { this.relationSet.add(new SQLTableRelations(srcTableIdentifier.toString, sinkTableIdentifier.toString)) } + /** + * 维护表与表字段之间的关系 + * + * @param sourceColumn + * 数据来源表 + * @param targetColumn + * 目标表 + */ def addColRelation(sourceColumn: String, targetColumn: String): Unit = { this.colRelationSet.add(new SQLTableColumnsRelations(sourceColumn, targetColumn)) } diff --git a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala index 3296f722..488b3485 100644 --- a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala +++ b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala @@ -78,14 +78,15 @@ private[fire] trait FlinkSqlParserBase extends SqlParser { for (x <- results) { SQLLineageManager.addColRelation(x.getSourceColumn, x.getTargetColumn) } + SQLLineageManager.addRelation(TableIdentifier(results.last.getSourceTable), TableIdentifier(results.last.getTargetTable)) } case createView: SqlCreateView => { this.parseSqlNode(createView.getViewName, Operation.CREATE_VIEW) this.parseSqlNode(createView.getQuery, Operation.SELECT) } case createTable: SqlCreateTable => - stableEnv.executeSql(sql) parseCreateTable(createTable) + stableEnv.executeSql(sql) case _ => this.hiveSqlParser(sql) } } catch { -- Gitee From 013fd75040bb32660e2c6793db3730ce7041c5b6 Mon Sep 17 00:00:00 2001 From: wsczm <646659000@qq.com> Date: Mon, 4 Dec 2023 17:59:37 +0800 Subject: [PATCH 5/9] =?UTF-8?q?flink=20sql=E5=AD=97=E6=AE=B5=E8=A1=80?= =?UTF-8?q?=E7=BC=98=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fire-examples/flink-examples/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fire-examples/flink-examples/pom.xml b/fire-examples/flink-examples/pom.xml index 1932aaf0..8d4e1741 100644 --- a/fire-examples/flink-examples/pom.xml +++ b/fire-examples/flink-examples/pom.xml @@ -248,11 +248,11 @@ - + com.oracle -- Gitee From 8c17c0cabe914e0fa82cfc5c7d5b40c19e7c2004 Mon Sep 17 00:00:00 2001 From: wsczm <646659000@qq.com> Date: Fri, 15 Dec 2023 16:45:32 +0800 Subject: [PATCH 6/9] =?UTF-8?q?flink=20sql=E5=AD=97=E6=AE=B5=E8=A1=80?= =?UTF-8?q?=E7=BC=98=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../fire/common/bean/lineage/SQLLineage.java | 11 +---- .../lineage/SQLTableColumnsRelations.java | 23 ++++++++++ .../bean/lineage/SQLTableRelations.java | 27 +++++++---- .../common/lineage/SQLLineageManager.scala | 10 ++-- fire-core/src/main/resources/fire.properties | 4 +- .../fire/flink/lineage/LineageContext.scala | 34 ++++++++++---- .../fire/flink/lineage/LineageContext.scala | 17 +++++++ .../fire/flink/lineage/LineageContext.scala | 17 +++++++ .../fire/flink/lineage/LineageContext.scala | 17 +++++++ .../fire/flink/lineage/LineageContext.scala | 18 +++++++- .../fire/flink/sql/FlinkSqlParserBase.scala | 9 ++-- .../zto/fire/examples/flink/FlinkDemo.scala | 12 +++-- .../fire/examples/flink/FlinkSQLDemo.scala | 46 ++++++++++++++++++- 13 files changed, 200 insertions(+), 45 deletions(-) diff --git a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLLineage.java b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLLineage.java index 720ef52d..bccdb4fe 100644 --- a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLLineage.java +++ b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLLineage.java @@ -42,12 +42,11 @@ public class SQLLineage { * 描述表与表之前的数据血缘关系 */ private List relations; - private List colRelations; + public SQLLineage() { this.statements = new LinkedList<>(); this.tables = new LinkedList<>(); this.relations = new LinkedList<>(); - this.colRelations = new LinkedList<>(); } public List getStatements() { @@ -74,12 +73,4 @@ public class SQLLineage { return relations; } - public List getColRelations() { - return colRelations; - } - - public void setColRelations(List colRelations) { - this.colRelations = colRelations; - } - } \ No newline at end of file diff --git a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableColumnsRelations.java b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableColumnsRelations.java index 33962f56..c82aad79 100644 --- a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableColumnsRelations.java +++ b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableColumnsRelations.java @@ -1,7 +1,30 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + package com.zto.fire.common.bean.lineage; import java.util.Objects; +/** + * 用于封装采集到SQL的实时血缘信息:字段级血缘 + * @author wsczm + */ public class SQLTableColumnsRelations { private String sourceColumn; private String targetColumn; diff --git a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableRelations.java b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableRelations.java index 65cdb564..6f3d2399 100644 --- a/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableRelations.java +++ b/fire-common/src/main/java/com/zto/fire/common/bean/lineage/SQLTableRelations.java @@ -17,6 +17,7 @@ package com.zto.fire.common.bean.lineage; +import java.util.HashSet; import java.util.Objects; /** @@ -37,12 +38,16 @@ public class SQLTableRelations { */ private String sinkTable; + private HashSet sqlTableColumnsRelations; + public SQLTableRelations() { } - public SQLTableRelations(String srcTable, String sinkTable) { + + public SQLTableRelations(String srcTable, String sinkTable, HashSet sqlTableColumnsRelations) { this.srcTable = srcTable; this.sinkTable = sinkTable; + this.sqlTableColumnsRelations = sqlTableColumnsRelations; } public void setSrcTable(String srcTable) { @@ -61,20 +66,24 @@ public class SQLTableRelations { return sinkTable; } + public HashSet getSqlTableColumnsRelations() { + return sqlTableColumnsRelations; + } + + public void setSqlTableColumnsRelations(HashSet sqlTableColumnsRelations) { + this.sqlTableColumnsRelations = sqlTableColumnsRelations; + } + @Override public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; SQLTableRelations that = (SQLTableRelations) o; - return Objects.equals(srcTable, that.srcTable) && Objects.equals(sinkTable, that.sinkTable); + return Objects.equals(srcTable, that.srcTable) && Objects.equals(sinkTable, that.sinkTable) && Objects.equals(sqlTableColumnsRelations, that.sqlTableColumnsRelations); } @Override public int hashCode() { - return Objects.hash(srcTable, sinkTable); + return Objects.hash(srcTable, sinkTable, sqlTableColumnsRelations); } } \ No newline at end of file diff --git a/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala b/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala index 371d6f10..a08e6efa 100644 --- a/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala +++ b/fire-common/src/main/scala/com/zto/fire/common/lineage/SQLLineageManager.scala @@ -32,7 +32,6 @@ private[fire] object SQLLineageManager { private lazy val statementSet = new JHashSet[String]() private lazy val relationSet = new JHashSet[SQLTableRelations]() private lazy val tableLineageMap = new JConcurrentHashMap[String, SQLTable]() - private lazy val colRelationSet = new JHashSet[SQLTableColumnsRelations]() /** * 添加待执行的SQL语句 @@ -49,8 +48,8 @@ private[fire] object SQLLineageManager { * @param sinkTable * 目标表 */ - def addRelation(srcTableIdentifier: TableIdentifier, sinkTableIdentifier: TableIdentifier): Unit = { - this.relationSet.add(new SQLTableRelations(srcTableIdentifier.toString, sinkTableIdentifier.toString)) + def addRelation(srcTableIdentifier: TableIdentifier, sinkTableIdentifier: TableIdentifier, relationSet: JHashSet[SQLTableColumnsRelations]): Unit = { + this.relationSet.add(new SQLTableRelations(srcTableIdentifier.toString, sinkTableIdentifier.toString, relationSet)) } /** @@ -61,9 +60,7 @@ private[fire] object SQLLineageManager { * @param targetColumn * 目标表 */ - def addColRelation(sourceColumn: String, targetColumn: String): Unit = { - this.colRelationSet.add(new SQLTableColumnsRelations(sourceColumn, targetColumn)) - } + /** * 获取SQL血缘信息 @@ -73,7 +70,6 @@ private[fire] object SQLLineageManager { sqlLineage.setStatements(this.statementSet.toList) sqlLineage.setTables(this.tableLineageMap.values().toList) sqlLineage.setRelations(this.relationSet.toList) - sqlLineage.setColRelations(this.colRelationSet.toList) sqlLineage } diff --git a/fire-core/src/main/resources/fire.properties b/fire-core/src/main/resources/fire.properties index ff10838d..0b15c8cd 100644 --- a/fire-core/src/main/resources/fire.properties +++ b/fire-core/src/main/resources/fire.properties @@ -99,7 +99,9 @@ fire.lineage.debug.enable = # \u662F\u5426\u91C7\u96C6\u539F\u59CB\u7684SQL\u8BED\u53E5 fire.lineage.collect_sql.enable = true # \u662F\u5426\u5F00\u542F\u5C06\u8840\u7F18\u4FE1\u606F\u53D1\u9001\u5230\u6D88\u606F\u961F\u5217 -fire.lineage.send.mq.enable = false +fire.lineage.send.mq.enable = true +fire.lineage.send.mq.url = 10.50.0.35:9092 +fire.lineage.send.mq.topic = fire_lineage # \u8840\u7F18\u89E3\u6790\u5F02\u6B65\u7EBF\u7A0B\u6267\u884C\u7684\u6B21\u6570 fire.lineage.run.count = 360 # \u8840\u7F18\u89E3\u6790\u5B58\u653E\u7684\u961F\u5217\u6570\u6700\u5927\u503C diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala index c610e6ef..4c4082aa 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.zto.fire.flink.lineage import com.zto.fire.common.bean.lineage.LineageResult @@ -17,7 +34,11 @@ import scala.collection.JavaConverters.{asScalaBufferConverter, asScalaSetConver import scala.collection.mutable.ListBuffer import scala.language.postfixOps - +/** + * flink sql 解析入口 + * + * @author wsczm + */ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { @@ -40,14 +61,11 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { .map(tableColumn => tableColumn.getName) .toList .asJava - validateSchema(sinkTable, optRelNode, targetColumnList) val metadataQuery = optRelNode.getCluster.getMetadataQuery val resultList = ListBuffer[LineageResult]() - for (index <- 0 until targetColumnList.size) { val targetColumn = targetColumnList.get(index) - val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index).asScala if (relColumnOriginSet.nonEmpty) { for (rco: RelColumnOrigin <- relColumnOriginSet) { @@ -74,8 +92,11 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { } + /** + * @param sql + * insert into sql + */ def analyzeLineage(sql: String) = { - RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val parsed = parseStatement(sql) val sinkTable = parsed._1 @@ -93,10 +114,7 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { case _ => throw new TableException("Only insert is supported now.") } - - } - private def parseValidateConvert(singleSql: String) = { RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala index af6371e9..bbe87de2 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.zto.fire.flink.lineage import com.zto.fire.common.bean.lineage.LineageResult diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala index a2db2e53..79478a47 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.zto.fire.flink.lineage import com.zto.fire.common.bean.lineage.LineageResult diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala index 53e1718c..04da7698 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.zto.fire.flink.lineage import com.zto.fire.common.bean.lineage.LineageResult diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala index 53e1718c..2a69b98c 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.zto.fire.flink.lineage import com.zto.fire.common.bean.lineage.LineageResult @@ -31,7 +47,7 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { } } - def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode):ListBuffer[LineageResult] = { + def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode): ListBuffer[LineageResult] = { val targetColumnList = tableEnv.from(sinkTable) .getResolvedSchema .getColumnNames diff --git a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala index 488b3485..f2b3225b 100644 --- a/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala +++ b/fire-engines/fire-flink/src/main/scala/com/zto/fire/flink/sql/FlinkSqlParserBase.scala @@ -20,6 +20,7 @@ package com.zto.fire.flink.sql import com.zto.fire._ import com.zto.fire.common.anno.Internal import com.zto.fire.common.bean.TableIdentifier +import com.zto.fire.common.bean.lineage.SQLTableColumnsRelations import com.zto.fire.common.conf.FireHiveConf import com.zto.fire.common.enu.{Datasource, Operation} import com.zto.fire.common.lineage.SQLLineageManager @@ -30,6 +31,7 @@ import com.zto.fire.flink.conf.FireFlinkConf import com.zto.fire.flink.lineage.LineageContext import com.zto.fire.flink.util.{FlinkSingletonFactory, FlinkUtils} import com.zto.fire.jdbc.conf.FireJdbcConf +import com.zto.fire.predef.JHashSet import org.apache.calcite.sql._ import org.apache.flink.configuration.Configuration import org.apache.flink.sql.parser.SqlProperty @@ -75,10 +77,11 @@ private[fire] trait FlinkSqlParserBase extends SqlParser { this.parseSqlNode(select) case insert: RichSqlInsert => { val results = context.analyzeLineage(sql) + val relationses = new JHashSet[SQLTableColumnsRelations]() for (x <- results) { - SQLLineageManager.addColRelation(x.getSourceColumn, x.getTargetColumn) + relationses.add(new SQLTableColumnsRelations(x.getSourceColumn, x.getTargetColumn)) } - SQLLineageManager.addRelation(TableIdentifier(results.last.getSourceTable), TableIdentifier(results.last.getTargetTable)) + SQLLineageManager.addRelation(TableIdentifier(results.last.getSourceTable), TableIdentifier(results.last.getTargetTable), relationses) } case createView: SqlCreateView => { this.parseSqlNode(createView.getViewName, Operation.CREATE_VIEW) @@ -150,7 +153,7 @@ private[fire] trait FlinkSqlParserBase extends SqlParser { val tableIdentifier = toFireTableIdentifier(sqlIdentifier, isHive) this.addCatalog(tableIdentifier, operation) if (targetTable.isDefined) { - SQLLineageManager.addRelation(tableIdentifier, TableIdentifier(targetTable.get.toString)) + SQLLineageManager.addRelation(tableIdentifier, TableIdentifier(targetTable.get.toString),null) } } case sqlNodeList: SqlNodeList => JavaConversions.asScalaBuffer(sqlNodeList.getList).foreach(this.parseSqlNode(_)) diff --git a/fire-examples/flink-examples/src/main/scala/com/zto/fire/examples/flink/FlinkDemo.scala b/fire-examples/flink-examples/src/main/scala/com/zto/fire/examples/flink/FlinkDemo.scala index 6598d771..ab46495f 100644 --- a/fire-examples/flink-examples/src/main/scala/com/zto/fire/examples/flink/FlinkDemo.scala +++ b/fire-examples/flink-examples/src/main/scala/com/zto/fire/examples/flink/FlinkDemo.scala @@ -29,22 +29,24 @@ import com.zto.fire.flink.anno.Streaming * * @contact Fire框架技术交流群(钉钉):35373471 */ -@Config( +/*@Config( """ |# 支持Flink调优参数、Fire框架参数、用户自定义参数等 |state.checkpoints.num-retained=30 |state.checkpoints.dir=hdfs:///user/flink/checkpoint |""") -@Hive("thrift://localhost:9083") // 配置连接到指定的hive +@Hive("thrift://localhost:9083") // 配置连接到指定的hive*/ @Streaming(interval = 100, unaligned = true, parallelism = 4) // 100s做一次checkpoint,开启非对齐checkpoint -@Kafka(brokers = "localhost:9092", topics = "fire", groupId = "fire") +@Kafka(brokers = "10.50.0.35:9092", topics = "mo_alarm_event", groupId = "fire1") object FlinkDemo extends FlinkStreaming { @Process def kafkaSource: Unit = { val dstream = this.fire.createKafkaDirectStream() // 使用api的方式消费kafka - sql("""create table statement ...""") - sql("""insert into statement ...""") +/* sql("""create table statement ...""") + sql("""insert into statement ...""")*/ + dstream.print() +// this.fire.start } } diff --git a/fire-examples/flink-examples/src/main/scala/com/zto/fire/examples/flink/FlinkSQLDemo.scala b/fire-examples/flink-examples/src/main/scala/com/zto/fire/examples/flink/FlinkSQLDemo.scala index 9b264002..e0e62e8b 100644 --- a/fire-examples/flink-examples/src/main/scala/com/zto/fire/examples/flink/FlinkSQLDemo.scala +++ b/fire-examples/flink-examples/src/main/scala/com/zto/fire/examples/flink/FlinkSQLDemo.scala @@ -17,7 +17,7 @@ package com.zto.fire.examples.flink -import com.zto.fire.core.anno.lifecycle.{Step1, Step2, Step3} +import com.zto.fire.core.anno.lifecycle.{Step1, Step2, Step3, Step4, Step5, Step6} import com.zto.fire.flink.FlinkStreaming import com.zto.fire.flink.anno.Streaming @@ -81,4 +81,48 @@ object FlinkSQLDemo extends FlinkStreaming { |group by id, name, age, createTime, sex |""".stripMargin) } + + @Step4("定义源表表结构") + def sourceTable1: Unit = { + sql( + s""" + | CREATE TABLE t_student1 ( + | id BIGINT, + | name STRING, + | age INT, + | createTime TIMESTAMP(13), + | sex Boolean + |) WITH ( + | 'connector' = 'datagen', + | 'rows-per-second'='100', -- 5000/s + | 'fields.id.min'='1', -- id字段,1到1000之间 + | 'fields.id.max'='1000', + | 'fields.name.length'='5', -- name字段,长度为5 + | 'fields.age.min'='1', -- age字段,1到120岁 + | 'fields.age.max'='120' + |) + |""".stripMargin) + } + + @Step5("定义目标表结构") + def destTable1: Unit = { + sql( + s""" + |CREATE TABLE t_print_table1 WITH ('connector' = 'print') + |LIKE t_student (EXCLUDING ALL) + |""".stripMargin) + } + + @Step6("执行insert语句") + def insertStatement1: Unit = { + sql( + s""" + |insert into t_print_table1 + |select + | id, name, age, createTime, sex + |from t_student1 + |group by id, name, age, createTime, sex + |""".stripMargin) + } + } -- Gitee From f55aa6d9e635c60fbd72d4dd571afe79f662a1ac Mon Sep 17 00:00:00 2001 From: wsczm <646659000@qq.com> Date: Tue, 19 Dec 2023 10:35:05 +0800 Subject: [PATCH 7/9] =?UTF-8?q?flink=20sql=E5=AD=97=E6=AE=B5=E8=A1=80?= =?UTF-8?q?=E7=BC=98=E6=A0=BC=E5=BC=8F=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fire-core/src/main/resources/fire.properties | 175 +++++++++--------- .../fire/flink/lineage/LineageContext.scala | 30 ++- .../fire/flink/lineage/LineageContext.scala | 40 ++-- .../fire/flink/lineage/LineageContext.scala | 32 +++- .../fire/flink/lineage/LineageContext.scala | 33 +++- .../fire/flink/lineage/LineageContext.scala | 26 ++- .../calcite/rel/metadata/RelColumnOrigin.java | 15 +- .../rel/metadata/RelMdColumnOrigins.java | 72 +++---- .../calcite/rel/metadata/RelColumnOrigin.java | 5 +- .../rel/metadata/RelMdColumnOrigins.java | 30 +-- .../calcite/rel/metadata/RelColumnOrigin.java | 3 +- .../rel/metadata/RelMdColumnOrigins.java | 4 +- .../calcite/rel/metadata/RelColumnOrigin.java | 3 +- .../rel/metadata/RelMdColumnOrigins.java | 4 +- .../calcite/rel/metadata/RelColumnOrigin.java | 3 +- .../rel/metadata/RelMdColumnOrigins.java | 3 +- 16 files changed, 266 insertions(+), 212 deletions(-) diff --git a/fire-core/src/main/resources/fire.properties b/fire-core/src/main/resources/fire.properties index 0b15c8cd..9b675e4e 100644 --- a/fire-core/src/main/resources/fire.properties +++ b/fire-core/src/main/resources/fire.properties @@ -14,183 +14,176 @@ # See the License for the specific language governing permissions and # limitations under the License. # - # ----------------------------------------------- < fire \u914D\u7F6E > ------------------------------------------------ # # \u5F53\u524Dfire\u6846\u67B6\u7684\u7248\u672C\u53F7 -fire.version = ${project.version} +fire.version=${project.version} # fire\u5185\u7F6E\u7EBF\u7A0B\u6C60\u5927\u5C0F -fire.thread.pool.size = 3 +fire.thread.pool.size=3 # fire\u5185\u7F6E\u5B9A\u65F6\u4EFB\u52A1\u7EBF\u7A0B\u6C60\u5927\u5C0F -fire.thread.pool.schedule.size = 3 +fire.thread.pool.schedule.size=3 # \u662F\u5426\u542F\u7528restful\u670D\u52A1 -fire.rest.enable = true +fire.rest.enable=true # \u7528\u4E8E\u8BBE\u7F6E\u662F\u5426\u505A\u63A5\u53E3\u6821\u9A8C -fire.rest.filter.enable = true +fire.rest.filter.enable=true # \u662F\u5426\u6253\u5370\u914D\u7F6E\u4FE1\u606F -fire.conf.show.enable = false +fire.conf.show.enable=false # fire.conf.deploy.engine=className\uFF0C\u5728\u4E0D\u540C\u5F15\u64CE\u5B9E\u73B0\u6A21\u5757\u4E2D\uFF0C\u6307\u5B9A\u5177\u4F53\u53EF\u83B7\u53D6\u914D\u7F6E\u4FE1\u606F\u7684EngineConf\u5B50\u7C7B\u5B9E\u73B0\uFF0C\u7528\u4E8E\u540C\u6B65\u914D\u7F6E\u5230\u5404container\u8282\u70B9 # \u662F\u5426\u6253\u5370restful\u5730\u5740 -fire.rest.url.show.enable = false +fire.rest.url.show.enable=false # \u662F\u5426\u542F\u7528hostname\u4F5C\u4E3Arest\u670D\u52A1\u7684\u8BBF\u95EE\u5730\u5740 -fire.rest.url.hostname = false +fire.rest.url.hostname=false # \u662F\u5426\u5173\u95EDfire\u5185\u7F6E\u7684\u6240\u6709\u7D2F\u52A0\u5668 -fire.acc.enable = true +fire.acc.enable=true # \u65E5\u5FD7\u7D2F\u52A0\u5668\u5F00\u5173 -fire.acc.log.enable = true +fire.acc.log.enable=true # \u591A\u503C\u7D2F\u52A0\u5668\u5F00\u5173 -fire.acc.multi.counter.enable = true +fire.acc.multi.counter.enable=true # \u591A\u65F6\u95F4\u7EF4\u5EA6\u7D2F\u52A0\u5668\u5F00\u5173 -fire.acc.multi.timer.enable = true +fire.acc.multi.timer.enable=true # fire\u6846\u67B6\u57CB\u70B9\u65E5\u5FD7\u5F00\u5173\uFF0C\u5173\u95ED\u4EE5\u540E\u5C06\u4E0D\u518D\u6253\u5370\u57CB\u70B9\u65E5\u5FD7 -fire.log.enable = true +fire.log.enable=true # \u7528\u4E8E\u9650\u5B9Afire\u6846\u67B6\u4E2Dsql\u65E5\u5FD7\u7684\u5B57\u7B26\u4E32\u957F\u5EA6 -fire.log.sql.length = 100 +fire.log.sql.length=100 # \u662F\u5426\u542F\u7528\u4E3Aconnector\u6CE8\u518Cshutdown hook\uFF0C\u5F53jvm\u9000\u51FA\u524Dclose -fire.connector.shutdown_hook.enable = false +fire.connector.shutdown_hook.enable=false # fire\u6846\u67B6\u9488\u5BF9jdbc\u64CD\u4F5C\u540E\u6570\u636E\u96C6\u7684\u7F13\u5B58\u7B56\u7565 -fire.jdbc.storage.level = memory_and_disk_ser +fire.jdbc.storage.level=memory_and_disk_ser # \u901A\u8FC7JdbcConnector\u67E5\u8BE2\u540E\u5C06\u6570\u636E\u96C6\u653E\u5230\u591A\u5C11\u4E2A\u5206\u533A\u4E2D\uFF0C\u9700\u6839\u636E\u5B9E\u9645\u7684\u7ED3\u679C\u96C6\u505A\u914D\u7F6E -fire.jdbc.query.partitions = 10 +fire.jdbc.query.partitions=10 # \u662F\u5426\u542F\u7528\u5B9A\u65F6\u8C03\u5EA6 -fire.task.schedule.enable = true +fire.task.schedule.enable=true # \u662F\u5426\u542F\u7528\u52A8\u6001\u914D\u7F6E -fire.dynamic.conf.enable = true +fire.dynamic.conf.enable=true # fire\u6846\u67B6rest\u63A5\u53E3\u670D\u52A1\u6700\u5927\u7EBF\u7A0B\u6570 -fire.restful.max.thread = 8 +fire.restful.max.thread=8 # quartz\u6700\u5927\u7EBF\u7A0B\u6C60\u5927\u5C0F -fire.quartz.max.thread = 5 +fire.quartz.max.thread=5 # fire\u6536\u96C6\u65E5\u5FD7\u4FDD\u7559\u7684\u6700\u5C11\u8BB0\u5F55\u6570 -fire.acc.log.min.size = 500 +fire.acc.log.min.size=500 # fire\u6536\u96C6\u65E5\u5FD7\u4FDD\u7559\u7684\u6700\u591A\u8BB0\u5F55\u6570 -fire.acc.log.max.size = 1000 +fire.acc.log.max.size=1000 # timer\u7D2F\u52A0\u5668\u4FDD\u7559\u6700\u5927\u7684\u8BB0\u5F55\u6570 -fire.acc.timer.max.size = 1000 +fire.acc.timer.max.size=1000 # timer\u7D2F\u52A0\u5668\u6E05\u7406\u51E0\u5C0F\u65F6\u4E4B\u524D\u7684\u8BB0\u5F55 -fire.acc.timer.max.hour = 12 +fire.acc.timer.max.hour=12 # env\u7D2F\u52A0\u5668\u5F00\u5173 -fire.acc.env.enable = true +fire.acc.env.enable=true # env\u7D2F\u52A0\u5668\u4FDD\u7559\u6700\u591A\u7684\u8BB0\u5F55\u6570 -fire.acc.env.max.size = 500 +fire.acc.env.max.size=500 # env\u7D2F\u52A0\u5668\u4FDD\u7559\u6700\u5C11\u7684\u8BB0\u5F55\u6570 -fire.acc.env.min.size = 100 +fire.acc.env.min.size=100 # \u5B9A\u65F6\u8C03\u5EA6\u4EFB\u52A1\u9ED1\u540D\u5355\uFF0C\u914D\u7F6E\u7684value\u4E3A\u5B9A\u65F6\u4EFB\u52A1\u65B9\u6CD5\u540D\uFF0C\u591A\u4E2A\u4EE5\u9017\u53F7\u5206\u9694 -fire.scheduler.blacklist = +fire.scheduler.blacklist= # \u914D\u7F6E\u6253\u5370\u9ED1\u540D\u5355\uFF0C\u5305\u542B\u8BE5\u914D\u7F6E\u5C06\u4E0D\u88AB\u6253\u5370 -fire.conf.print.blacklist = .map.,pass,secret,zrc,connection,hdfs.ha,print.blacklist,yarn,namenode,metastore,address,redaction +fire.conf.print.blacklist=.map.,pass,secret,zrc,connection,hdfs.ha,print.blacklist,yarn,namenode,metastore,address,redaction # fire\u6846\u67B6restful\u7AEF\u53E3\u51B2\u7A81\u91CD\u8BD5\u6B21\u6570 -fire.restful.port.retry_num = 3 +fire.restful.port.retry_num=3 # fire\u6846\u67B6restful\u7AEF\u53E3\u51B2\u7A81\u91CD\u8BD5\u65F6\u95F4\uFF08ms\uFF09 -fire.restful.port.retry_duration = 1000 +fire.restful.port.retry_duration=1000 # \u65E5\u5FD7\u7684\u7EA7\u522B\uFF0C\u7EDF\u4E00\u524D\u7F00\u4E3A\uFF1Afire.log.level.conf. -fire.log.level.conf.org.apache.spark = INFO -fire.log.level.conf.org.spark_project = INFO -fire.log.level.conf.org.apache.kafka = WARN -fire.log.level.conf.org.apache.zookeeper = WARN -fire.log.level.conf.com.zto.fire = INFO -fire.log.level.conf.org.eclipse.jetty.server = ERROR +fire.log.level.conf.org.apache.spark=INFO +fire.log.level.conf.org.spark_project=INFO +fire.log.level.conf.org.apache.kafka=WARN +fire.log.level.conf.org.apache.zookeeper=WARN +fire.log.level.conf.com.zto.fire=INFO +fire.log.level.conf.org.eclipse.jetty.server=ERROR # \u662F\u5426\u5C06\u914D\u7F6E\u540C\u6B65\u5230executor\u3001taskmanager\u7AEF -fire.deploy_conf.enable = true +fire.deploy_conf.enable=true # \u6BCF\u4E2Ajvm\u5B9E\u4F8B\u5185\u90E8queue\u7528\u4E8E\u5B58\u653E\u5F02\u5E38\u5BF9\u8C61\u6570\u6700\u5927\u5927\u5C0F\uFF0C\u907F\u514D\u961F\u5217\u8FC7\u5927\u9020\u6210\u5185\u5B58\u6EA2\u51FA -fire.exception_bus.size = 1000 +fire.exception_bus.size=1000 # \u662F\u5426\u5F00\u542F\u5B9E\u65F6\u8840\u7F18\u91C7\u96C6 -fire.lineage.enable = true +fire.lineage.enable=true # \u662F\u5426\u5F00\u542F\u8840\u7F18\u7684debug\u6A21\u5F0F\u534F\u52A9\u6392\u67E5\u95EE\u9898 -fire.lineage.debug.enable = false +fire.lineage.debug.enable=false # \u662F\u5426\u91C7\u96C6\u539F\u59CB\u7684SQL\u8BED\u53E5 -fire.lineage.collect_sql.enable = true +fire.lineage.collect_sql.enable=true # \u662F\u5426\u5F00\u542F\u5C06\u8840\u7F18\u4FE1\u606F\u53D1\u9001\u5230\u6D88\u606F\u961F\u5217 -fire.lineage.send.mq.enable = true -fire.lineage.send.mq.url = 10.50.0.35:9092 -fire.lineage.send.mq.topic = fire_lineage +# fire.lineage.send.mq.enable = true +# fire.lineage.send.mq.url = node01:9092 +# fire.lineage.send.mq.topic = fire_lineage # \u8840\u7F18\u89E3\u6790\u5F02\u6B65\u7EBF\u7A0B\u6267\u884C\u7684\u6B21\u6570 -fire.lineage.run.count = 360 +fire.lineage.run.count=360 # \u8840\u7F18\u89E3\u6790\u5B58\u653E\u7684\u961F\u5217\u6570\u6700\u5927\u503C -fire.lineage.max.size = 500 +fire.lineage.max.size=500 # \u5B9A\u65F6\u89E3\u6790\u57CB\u70B9SQL\u7684\u521D\u59CB\u5EF6\u8FDF\uFF08s\uFF09 -fire.lineage.run.initialDelay = 60 +fire.lineage.run.initialDelay=60 # \u5B9A\u65F6\u89E3\u6790\u57CB\u70B9SQL\u7684\u6267\u884C\u9891\u7387\uFF08s\uFF09 -fire.lineage.run.period = 120 +fire.lineage.run.period=120 # \u7528\u4E8Ejdbc url\u7684\u8BC6\u522B\uFF0C\u5F53\u65E0\u6CD5\u901A\u8FC7driver class\u8BC6\u522B\u6570\u636E\u6E90\u65F6\uFF0C\u5C06\u4ECEurl\u4E2D\u7684\u7AEF\u53E3\u53F7\u8FDB\u884C\u533A\u5206\uFF0C\u4E0D\u540C\u6570\u636E\u914D\u7F6E\u4F7F\u7528\u7EDF\u4E00\u7684\u524D\u7F00\uFF1Afire.lineage.datasource.map. -fire.lineage.datasource.map.tidb = 4000 -fire.lineage.datasource.map.doris = 9030 -fire.lineage.datasource.map.presto = 7890 +fire.lineage.datasource.map.tidb=4000 +fire.lineage.datasource.map.doris=9030 +fire.lineage.datasource.map.presto=7890 # \u662F\u5426\u5F00\u542F\u914D\u7F6E\u81EA\u9002\u5E94\u524D\u7F00\uFF0C\u81EA\u52A8\u4E3A\u914D\u7F6E\u52A0\u4E0A\u5F15\u64CE\u524D\u7F00\uFF08spark.|flink.\uFF09 -fire.conf.adaptive.prefix = true +fire.conf.adaptive.prefix=true # \u7528\u6237\u7EDF\u4E00\u914D\u7F6E\u6587\u4EF6\uFF0C\u5141\u8BB8\u7528\u6237\u5728\u8BE5\u914D\u7F6E\u6587\u4EF6\u4E2D\u5B58\u653E\u516C\u5171\u7684\u914D\u7F6E\u4FE1\u606F\uFF0C\u4F18\u5148\u7EA7\u4F4E\u4E8E\u4EFB\u52A1\u914D\u7F6E\u6587\u4EF6\uFF08\u591A\u4E2A\u4EE5\u9017\u53F7\u5206\u9694\uFF09 -fire.user.common.conf = common.properties,conf-test.properties,conf-dev.properties,conf-prod.properties +fire.user.common.conf=common.properties,conf-test.properties,conf-dev.properties,conf-prod.properties # fire\u63A5\u53E3\u8BA4\u8BC1\u79D8\u94A5 -fire.rest.server.secret = fire +fire.rest.server.secret=fire # \u662F\u5426\u5728\u8C03\u7528shutdown\u65B9\u6CD5\u65F6\u4E3B\u52A8\u9000\u51FAjvm\u8FDB\u7A0B -fire.shutdown.auto.exit = true +fire.shutdown.auto.exit=true # \u8C03\u7528print\u6253\u5370\u65E5\u5FD7\u81F3\u591A\u591A\u5C11\u6761\uFF0C\u7528\u4E8E\u9650\u5236\u6BCF\u4E2Acontainer\u6253\u5370\u7684\u6570\u636E\u91CF\uFF0C\u907F\u514D\u5927\u65E5\u5FD7\u6587\u4EF6\u5360\u6EE1\u78C1\u76D8\uFF0C\u5F53\u503C\u5C0F\u4E8E\u7B49\u4E8E\u96F6\u65F6\u8868\u793A\u4E0D\u9650\u5236 -fire.print.limit = 1000000 +fire.print.limit=1000000 # \u7528\u4E8E\u6307\u5B9A\u5F53\u524D\u8FD0\u884C\u73AF\u5883\u662F\u5426\u4E3Alocal\u6A21\u5F0F\uFF08\u4E3B\u8981\u7528\u4E8Eflink-shell\u7684\u672C\u5730\u914D\u7F6E\u6587\u4EF6\u52A0\u8F7D\uFF09 # fire.env.local = false # \u662F\u5426\u542F\u7528\u57FA\u4E8E\u6CE8\u89E3\u8FDB\u884C\u914D\u7F6E\uFF08@Kafka\u3001@RocketMQ\u3001@Hive\u3001@HBase\u7B49\uFF09 -fire.conf.annotation.enable = true +fire.conf.annotation.enable=true # \u662F\u5426\u542F\u7528\u5F02\u5E38\u5806\u6808\u65E5\u5FD7\u7684\u91C7\u96C6\uFF0C\u4E00\u65E6\u5F00\u542F\uFF0C\u5C06\u81EA\u52A8\u91C7\u96C6sql+api\u7B49\u5F02\u5E38\u5806\u6808\uFF0C\u5E76\u53D1\u9001\u5230\u6307\u5B9A\u7684kafka topic\u4E2D -fire.analysis.log.exception.stack.enable = false +fire.analysis.log.exception.stack.enable=false # \u5F02\u5E38\u4FE1\u606F\u53D1\u9001MQ\u5931\u8D25\u65F6\u6700\u5927\u91CD\u8BD5\u6B21\u6570 -fire.analysis.log.exception.send.maxRetires = 10 +fire.analysis.log.exception.send.maxRetires=10 # \u5F02\u5E38\u6D88\u606F\u53D1\u9001MQ\u8D85\u65F6\u65F6\u95F4 -fire.analysis.log.exception.send.timeout = 3000 +fire.analysis.log.exception.send.timeout=3000 # \u662F\u5426\u81EA\u52A8\u63D0\u4EA4\u4EFB\u52A1 -fire.job.autoStart = true +fire.job.autoStart=true # fire\u6846\u67B6\u540C\u6B65\u7D2F\u52A0\u5668\u5728\u6BCF\u4E2Acontainer\u7AEF\u5B58\u653E\u5B57\u7B26\u4E32\u7684\u6700\u5927\u8BB0\u5F55\u6570 -fire.acc.sync.max.size = 100 - +fire.acc.sync.max.size=100 # ----------------------------------------------- < kafka \u914D\u7F6E > ----------------------------------------------- # # kafka\u96C6\u7FA4\u540D\u79F0\u4E0E\u96C6\u7FA4\u5730\u5740\u6620\u5C04\uFF0C\u4EFB\u52A1\u4E2D\u901A\u8FC7kafka.brokers.name=local\u5373\u53EF\u8FDE\u5230\u4EE5\u4E0B\u914D\u7F6E\u7684broker\u5730\u5740 # fire.kafka.cluster.map.local = localhost:9092,localhost02:9092 - # ----------------------------------------------- < hive \u914D\u7F6E > ------------------------------------------------ # # \u9ED8\u8BA4\u7684hive\u6570\u636E\u5E93 -fire.hive.default.database.name = tmp +fire.hive.default.database.name=tmp # \u9ED8\u8BA4\u7684hive\u5206\u533A\u5B57\u6BB5\u540D\u79F0 -fire.hive.table.default.partition.name = ds +fire.hive.table.default.partition.name=ds # \u79BB\u7EBF\u96C6\u7FA4hive metastore\u5730\u5740\uFF08\u522B\u540D\uFF1Alocal\uFF09\uFF0C\u4EFB\u52A1\u4E2D\u901A\u8FC7fire.hive.cluster=local\u5373\u53EF\u8FDE\u5230\u4E00\u4E0B\u914D\u7F6E\u7684thrift\u5730\u5740 # fire.hive.cluster.map.local = thrift://localhost:9083,thrift://localhost02:9083 # \u662F\u5426\u542F\u7528hive metastore url\u7684\u968F\u673A\u9009\u62E9 -fire.hive.metastore.url.random.enable = true - +fire.hive.metastore.url.random.enable=true # ----------------------------------------------- < HBase \u914D\u7F6E > ----------------------------------------------- # # \u4E00\u6B21\u8BFB\u5199HBase\u7684\u6570\u636E\u91CF -fire.hbase.batch.size = 10000 +fire.hbase.batch.size=10000 # fire\u6846\u67B6\u9488\u5BF9hbase\u64CD\u4F5C\u540E\u6570\u636E\u96C6\u7684\u7F13\u5B58\u7B56\u7565 -fire.hbase.storage.level = memory_and_disk_ser +fire.hbase.storage.level=memory_and_disk_ser # \u901A\u8FC7HBase scan\u540Erepartition\u7684\u5206\u533A\u6570\uFF0C\u9700\u6839\u636Escan\u540E\u7684\u6570\u636E\u91CF\u505A\u914D\u7F6E -fire.hbase.scan.partitions = -1 +fire.hbase.scan.partitions=-1 # \u540E\u7EED\u7248\u672C\u4F1A\u5E9F\u5F03\uFF0C\u5E9F\u5F03\u540Efire.hbase.scan.partitions\u9ED8\u8BA4\u503C\u6539\u4E3A1200 -fire.hbase.scan.repartitions = 1200 +fire.hbase.scan.repartitions=1200 # \u662F\u5426\u5F00\u542FHBase\u8868\u5B58\u5728\u5224\u65AD\u7684\u7F13\u5B58\uFF0C\u5F00\u542F\u540E\u8868\u5B58\u5728\u5224\u65AD\u5C06\u907F\u514D\u5927\u91CF\u7684connection\u6D88\u8017 -fire.hbase.table.exists.cache.enable = true +fire.hbase.table.exists.cache.enable=true # \u662F\u5426\u5F00\u542FHBase\u8868\u5B58\u5728\u5217\u8868\u7F13\u5B58\u7684\u5B9A\u65F6\u66F4\u65B0\u4EFB\u52A1 -fire.hbase.table.exists.cache.reload.enable = true +fire.hbase.table.exists.cache.reload.enable=true # \u5B9A\u65F6\u5237\u65B0\u7F13\u5B58HBase\u8868\u4EFB\u52A1\u7684\u521D\u59CB\u5EF6\u8FDF\uFF08s\uFF09 -fire.hbase.table.exists.cache.initialDelay = 60 +fire.hbase.table.exists.cache.initialDelay=60 # \u5B9A\u65F6\u5237\u65B0\u7F13\u5B58HBase\u8868\u4EFB\u52A1\u7684\u6267\u884C\u9891\u7387\uFF08s\uFF09 -fire.hbase.table.exists.cache.period = 600 +fire.hbase.table.exists.cache.period=600 # hbase\u96C6\u7FA4\u7684zk\u5730\u5740\uFF08\u522B\u540D\uFF1Alocal\uFF09\uFF0C\u4EFB\u52A1\u4E2D\u901A\u8FC7hbase.cluster=local\u5373\u53EF\u8FDE\u5230\u5BF9\u5E94\u7684hbase\u96C6\u7FA4 # fire.hbase.cluster.map.local = localhost:2181,localhost02:2181 - # hbase connection \u914D\u7F6E\uFF0C\u7EA6\u5B9A\u4EE5\uFF1Afire.hbase.conf.\u5F00\u5934\uFF0C\u6BD4\u5982\uFF1Afire.hbase.conf.hbase.rpc.timeout\u5BF9\u5E94hbase\u4E2D\u7684\u914D\u7F6E\u4E3Ahbase.rpc.timeout -fire.hbase.conf.hbase.zookeeper.property.clientPort = 2181 -fire.hbase.conf.zookeeper.znode.parent = /hbase -fire.hbase.conf.hbase.rpc.timeout = 600000 -fire.hbase.conf.hbase.snapshot.master.timeoutMillis = 600000 -fire.hbase.conf.hbase.snapshot.region.timeout = 600000 - +fire.hbase.conf.hbase.zookeeper.property.clientPort=2181 +fire.hbase.conf.zookeeper.znode.parent=/hbase +fire.hbase.conf.hbase.rpc.timeout=600000 +fire.hbase.conf.hbase.snapshot.master.timeoutMillis=600000 +fire.hbase.conf.hbase.snapshot.region.timeout=600000 # ---------------------------------------------- < Arths \u914D\u7F6E > ----------------------------------------------- # # \u662F\u5426\u542F\u7528arthas\u4EE5\u4FBF\u4E8E\u5206\u6790\u4EFB\u52A1\u7684\u6027\u80FD -fire.analysis.arthas.enable = false +fire.analysis.arthas.enable=false # \u662F\u5426\u5728container\u7AEF\u542F\u52A8arthas -fire.analysis.arthas.container.enable = false +fire.analysis.arthas.container.enable=false # \u4EE5fire.analysis.arthas.conf.\u4E3A\u524D\u7F00\u7684\u914D\u7F6E\u652F\u6301arthas\u5168\u90E8\u7684\u53C2\u6570 - # --------------------------------------------- < \u914D\u7F6E\u4E2D\u5FC3\u914D\u7F6E > --------------------------------------------- # # \u6CE8\uFF1A\u914D\u7F6E\u4E2D\u5FC3\u7CFB\u7EDF\u5F02\u5E38\u65F6\u53EF\u8BBE\u7F6E\u4E3Afalse\uFF0C\u4E0D\u53D7\u914D\u7F6E\u4E2D\u5FC3\u5F71\u54CD\uFF0C\u53EF\u6B63\u5E38\u53D1\u5E03\u548C\u8FD0\u884C\uFF0C\u4EFB\u52A1\u7EA7\u522B\u914D\u7F6E\u4E0D\u751F\u6548 -fire.config_center.enable = false +fire.config_center.enable=false # \u672C\u5730\u8FD0\u884C\u73AF\u5883\u4E0B\uFF08Windows\u3001Mac\uFF09\u662F\u5426\u8C03\u7528\u914D\u7F6E\u4E2D\u5FC3\u63A5\u53E3\u83B7\u53D6\u914D\u7F6E\u4FE1\u606F\uFF0C\u4EFB\u52A1\u7EA7\u522B\u914D\u7F6E\u4E0D\u751F\u6548 -fire.config_center.local.enable = false +fire.config_center.local.enable=false # \u5E73\u53F0\u5C42\u4E3A\u6BCF\u4E2A\u4EFB\u52A1\u5206\u914D\u7684\u552F\u4E00id\u6807\u8BC6\uFF0C\u7528\u4E8E\u533A\u5206\u4E0D\u540C\u4EFB\u52A1\u7684\u914D\u7F6E\u6587\u4EF6\u83B7\u53D6 # fire.config_center.app.id \ No newline at end of file diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala index 4c4082aa..5742e5c7 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.12/com/zto/fire/flink/lineage/LineageContext.scala @@ -28,19 +28,20 @@ import org.apache.flink.table.operations.{CatalogSinkModifyOperation, Operation} import org.apache.flink.table.planner.operations.PlannerQueryOperation import org.apache.flink.table.planner.plan.metadata.FlinkDefaultRelMetadataProvider import org.apache.flink.table.planner.plan.schema.TableSourceTable - import java.util import scala.collection.JavaConverters.{asScalaBufferConverter, asScalaSetConverter, seqAsJavaListConverter} import scala.collection.mutable.ListBuffer import scala.language.postfixOps /** - * flink sql 解析入口 - * + * flink sql解析器 解析表字段血缘关系 * @author wsczm */ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { + /** + * Sql 字段检验 + */ private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { val queryFieldList = relNode.getRowType.getFieldNames if (queryFieldList.size() != sinkFieldList.size()) { @@ -53,6 +54,11 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { } } + /** + * 获取血缘关系 + * @param sinkTable + * @param optRelNode + */ def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode): ListBuffer[LineageResult] = { val targetColumnList: util.List[String] = tableEnv.from(sinkTable) .getSchema @@ -88,13 +94,13 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { } } resultList - - } /** - * @param sql - * insert into sql + * 获取血缘关系 + * @param sql INSERT INTO + * 1、获取 RelNode + * 2、根据RelNode 构造血缘 */ def analyzeLineage(sql: String) = { RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) @@ -104,6 +110,10 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { buildFiledLineageResult(sinkTable, oriRelNode) } + /** + * 根据SqlNode和元数据信息构建关系表达式RelNode树 + * @param singleSql INSERT INTO + */ private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { val operation = parseValidateConvert(singleSql) operation match { @@ -115,6 +125,12 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { throw new TableException("Only insert is supported now.") } } + + /** + * 获取Sql对应的 Operation 类型 + * + * @param singleSql INSERT INTO + */ private def parseValidateConvert(singleSql: String) = { RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala index bbe87de2..86c2f65d 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.13/com/zto/fire/flink/lineage/LineageContext.scala @@ -28,15 +28,21 @@ import org.apache.flink.table.operations.{CatalogSinkModifyOperation, Operation} import org.apache.flink.table.planner.operations.PlannerQueryOperation import org.apache.flink.table.planner.plan.metadata.FlinkDefaultRelMetadataProvider import org.apache.flink.table.planner.plan.schema.TableSourceTable - import java.util import scala.collection.JavaConverters.asScalaSetConverter import scala.collection.mutable.ListBuffer import scala.language.postfixOps - +/** + * flink sql解析器 解析表字段血缘关系 + * + * @author wsczm + */ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { + /** + * Sql 字段检验 + */ private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { val queryFieldList = relNode.getRowType.getFieldNames if (queryFieldList.size() != sinkFieldList.size()) { @@ -49,17 +55,18 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { } } + /** + * 获取血缘关系 + * @param sinkTable + * @param optRelNode + */ def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode): ListBuffer[LineageResult] = { - val targetColumnList = tableEnv.from(sinkTable) - .getResolvedSchema - .getColumnNames + val targetColumnList = tableEnv.from(sinkTable).getResolvedSchema.getColumnNames validateSchema(sinkTable, optRelNode, targetColumnList) val metadataQuery = optRelNode.getCluster.getMetadataQuery val resultList = ListBuffer[LineageResult]() - for (index <- 0 until targetColumnList.size) { val targetColumn = targetColumnList.get(index) - val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index).asScala import scala.collection.JavaConverters if (relColumnOriginSet.nonEmpty) { @@ -83,12 +90,15 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { } } resultList - - } + /** + * 获取血缘关系 + * @param sql INSERT INTO + *1、获取 RelNode + *2、根据RelNode 构造血缘 + */ def analyzeLineage(sql: String) = { - RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val parsed = parseStatement(sql) val sinkTable = parsed._1 @@ -96,6 +106,10 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { buildFiledLineageResult(sinkTable, oriRelNode) } + /** + * 根据SqlNode和元数据信息构建关系表达式RelNode树 + * @param singleSql INSERT INTO + */ private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { val operation = parseValidateConvert(singleSql) operation match { @@ -106,10 +120,12 @@ class LineageContext(tableEnv: TableEnvironmentImpl) extends Logging { case _ => throw new TableException("Only insert is supported now.") } - - } + /** + * 获取Sql对应的 Operation 类型 + * @param singleSql INSERT INTO + */ private def parseValidateConvert(singleSql: String) = { RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala index 79478a47..50d72f0c 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.14/com/zto/fire/flink/lineage/LineageContext.scala @@ -28,15 +28,21 @@ import org.apache.flink.table.operations.{CatalogSinkModifyOperation, Operation} import org.apache.flink.table.planner.operations.PlannerQueryOperation import org.apache.flink.table.planner.plan.metadata.FlinkDefaultRelMetadataProvider import org.apache.flink.table.planner.plan.schema.TableSourceTable - import java.util import scala.collection.JavaConverters.asScalaSetConverter import scala.collection.mutable.ListBuffer import scala.language.postfixOps - +/** + * flink sql解析器 解析表字段血缘关系 + * + * @author wsczm + */ class LineageContext(tableEnv: TableEnvironmentImpl) { + /** + * Sql 字段检验 + */ private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { val queryFieldList = relNode.getRowType.getFieldNames if (queryFieldList.size() != sinkFieldList.size()) { @@ -49,6 +55,9 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { } } + /** + * 获取血缘关系 + */ def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode):ListBuffer[LineageResult] = { val targetColumnList = tableEnv.from(sinkTable) .getResolvedSchema @@ -82,12 +91,15 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { } } resultList - - } + /** + * 获取血缘关系 + * @param sql INSERT INTO + * 1、获取 RelNode + * 2、根据RelNode 构造血缘 + */ def analyzeLineage(sql: String) = { - RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val parsed = parseStatement(sql) val sinkTable = parsed._1 @@ -95,6 +107,10 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { buildFiledLineageResult(sinkTable, oriRelNode) } + /** + * 根据SqlNode和元数据信息构建关系表达式RelNode树 + * @param singleSql INSERT INTO + */ private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { val operation = parseValidateConvert(singleSql) operation match { @@ -105,10 +121,12 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { case _ => throw new TableException("Only insert is supported now.") } - - } + /** + * 获取Sql对应的 Operation 类型 + * @param singleSql INSERT INTO + */ private def parseValidateConvert(singleSql: String) = { RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala index 04da7698..e25be5d3 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.15/com/zto/fire/flink/lineage/LineageContext.scala @@ -28,14 +28,17 @@ import org.apache.flink.table.operations.{Operation, SinkModifyOperation} import org.apache.flink.table.planner.operations.PlannerQueryOperation import org.apache.flink.table.planner.plan.metadata.FlinkDefaultRelMetadataProvider import org.apache.flink.table.planner.plan.schema.TableSourceTable - import java.util import scala.collection.mutable.ListBuffer import scala.language.postfixOps +import scala.collection.JavaConversions._ class LineageContext(tableEnv: TableEnvironmentImpl) { + /** + * Sql 字段检验 + */ private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { val queryFieldList = relNode.getRowType.getFieldNames if (queryFieldList.size() != sinkFieldList.size()) { @@ -48,6 +51,11 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { } } + /** + * 获取血缘关系 + * @param sinkTable + * @param optRelNode + */ def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode):ListBuffer[LineageResult] = { val targetColumnList = tableEnv.from(sinkTable) .getResolvedSchema @@ -55,13 +63,9 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { validateSchema(sinkTable, optRelNode, targetColumnList) val metadataQuery = optRelNode.getCluster.getMetadataQuery val resultList = ListBuffer[LineageResult]() - for (index <- 0 until targetColumnList.size) { val targetColumn = targetColumnList.get(index) - val relColumnOriginSet = metadataQuery.getColumnOrigins(optRelNode, index) - import scala.collection.JavaConversions._ - if (!relColumnOriginSet.nonEmpty) { for (rco: RelColumnOrigin <- relColumnOriginSet) { // table @@ -83,12 +87,15 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { } } resultList - - } + /** + * 获取血缘关系 + * @param sql INSERT INTO + * 1、获取 RelNode + * 2、根据RelNode 构造血缘 + */ def analyzeLineage(sql: String) = { - RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val parsed = parseStatement(sql) val sinkTable = parsed._1 @@ -96,6 +103,10 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { buildFiledLineageResult(sinkTable, oriRelNode) } + /** + * 根据SqlNode和元数据信息构建关系表达式RelNode树 + * @param singleSql INSERT INTO + */ private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { val operation = parseValidateConvert(singleSql) operation match { @@ -106,10 +117,12 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { case _ => throw new TableException("Only insert is supported now.") } - - } + /** + * 获取Sql对应的 Operation 类型 + * @param singleSql INSERT INTO + */ private def parseValidateConvert(singleSql: String) = { RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) diff --git a/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala b/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala index 2a69b98c..285c41cd 100644 --- a/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala +++ b/fire-engines/fire-flink/src/main/scala-flink-1.16/com/zto/fire/flink/lineage/LineageContext.scala @@ -35,6 +35,9 @@ import scala.language.postfixOps class LineageContext(tableEnv: TableEnvironmentImpl) { + /** + * Sql 字段检验 + */ private def validateSchema(sinkTable: String, relNode: RelNode, sinkFieldList: util.List[String]): Unit = { val queryFieldList = relNode.getRowType.getFieldNames if (queryFieldList.size() != sinkFieldList.size()) { @@ -47,6 +50,11 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { } } + /** + * 获取血缘关系 + * @param sinkTable + * @param optRelNode + */ def buildFiledLineageResult(sinkTable: String, optRelNode: RelNode): ListBuffer[LineageResult] = { val targetColumnList = tableEnv.from(sinkTable) .getResolvedSchema @@ -82,10 +90,14 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { } } resultList - - } + /** + * 获取血缘关系 + * @param sql INSERT INTO + * 1、获取 RelNode + * 2、根据RelNode 构造血缘 + */ def analyzeLineage(sql: String) = { RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) @@ -95,6 +107,10 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { buildFiledLineageResult(sinkTable, oriRelNode) } + /** + * 根据SqlNode和元数据信息构建关系表达式RelNode树 + * @param singleSql INSERT INTO + */ private def parseStatement(singleSql: String): Tuple2[String, RelNode] = { val operation = parseValidateConvert(singleSql) operation match { @@ -105,10 +121,12 @@ class LineageContext(tableEnv: TableEnvironmentImpl) { case _ => throw new TableException("Only insert is supported now.") } - - } + /** + * 获取Sql对应的 Operation 类型 + * @param singleSql INSERT INTO + */ private def parseValidateConvert(singleSql: String) = { RelMetadataQueryBase.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(FlinkDefaultRelMetadataProvider.INSTANCE)) val operations: util.List[Operation] = tableEnv.getParser.parse(singleSql) diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelColumnOrigin.java index 358299d5..c0a19a23 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelColumnOrigin.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -19,6 +19,7 @@ package org.apache.calcite.rel.metadata; +import lombok.Getter; import org.apache.calcite.plan.RelOptTable; /** @@ -36,6 +37,11 @@ import org.apache.calcite.plan.RelOptTable; public class RelColumnOrigin { // ~ Instance fields -------------------------------------------------------- + /** + * -- GETTER -- + * Returns table of origin. + */ + @Getter private final RelOptTable originTable; private final int iOriginColumn; @@ -71,13 +77,7 @@ public class RelColumnOrigin { } // ~ Methods ---------------------------------------------------------------- - - /** - * Returns table of origin. - */ - public RelOptTable getOriginTable() { - return originTable; - } + // TODO: ------------ start:二次开发代码 --------------- // /** * Returns the 0-based index of column in origin table; whether this ordinal @@ -105,6 +105,7 @@ public class RelColumnOrigin { return transform; } + // TODO: ------------ end:二次开发代码 --------------- // @Override public boolean equals(Object obj) { if (!(obj instanceof RelColumnOrigin)) { diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java index f3f0f57d..9bab7661 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.12/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -22,50 +22,19 @@ package org.apache.calcite.rel.metadata; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.SingleRel; -import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.core.AggregateCall; -import org.apache.calcite.rel.core.Calc; -import org.apache.calcite.rel.core.Correlate; -import org.apache.calcite.rel.core.Exchange; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.Match; -import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SetOp; -import org.apache.calcite.rel.core.Snapshot; -import org.apache.calcite.rel.core.Sort; -import org.apache.calcite.rel.core.TableFunctionScan; -import org.apache.calcite.rel.core.TableModify; -import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.core.*; import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexFieldAccess; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexLocalRef; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexPatternFieldRef; -import org.apache.calcite.rex.RexShuttle; -import org.apache.calcite.rex.RexVisitor; -import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.rex.*; import org.apache.calcite.util.BuiltInMethod; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - /** * Modified based on calcite's source code org.apache.calcite.rel.metadata.RelMdColumnOrigins * @@ -104,7 +73,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Aggregate rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { if (iOutputColumn < rel.getGroupCount()) { // get actual index of Group columns. return mq.getColumnOrigins(rel.getInput(), rel.getGroupSet().asList().get(iOutputColumn)); @@ -126,7 +95,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Join rel, RelMetadataQuery mq, - int iOutputColumn) { + int iOutputColumn) { int nLeftColumns = rel.getLeft().getRowType().getFieldList().size(); Set set; boolean derived = false; @@ -148,6 +117,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(SetOp rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { final Set set = new LinkedHashSet<>(); for (RelNode input : rel.getInputs()) { Set inputSet = mq.getColumnOrigins(input, iOutputColumn); @@ -212,15 +182,16 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Snapshot rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } + /** * Support the field blood relationship of watermark */ public Set getColumnOrigins(SingleRel rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } @@ -228,7 +199,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Project rel, - final RelMetadataQuery mq, int iOutputColumn) { + final RelMetadataQuery mq, int iOutputColumn) { final RelNode input = rel.getInput(); RexNode rexNode = rel.getProjects().get(iOutputColumn); @@ -298,7 +269,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Calc rel, - final RelMetadataQuery mq, int iOutputColumn) { + final RelMetadataQuery mq, int iOutputColumn) { final RelNode input = rel.getInput(); final RexShuttle rexShuttle = new RexShuttle() { @@ -322,28 +293,29 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Filter rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } public Set getColumnOrigins(Sort rel, RelMetadataQuery mq, - int iOutputColumn) { + int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } public Set getColumnOrigins(TableModify rel, RelMetadataQuery mq, - int iOutputColumn) { + int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } public Set getColumnOrigins(Exchange rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } public Set getColumnOrigins(TableFunctionScan rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { final Set set = new LinkedHashSet<>(); Set mappings = rel.getColumnMappings(); if (mappings == null) { @@ -379,7 +351,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(RelNode rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { // NOTE jvs 28-Mar-2006: We may get this wrong for a physical table // expression which supports projections. In that case, // it's up to the plugin writer to override with the @@ -532,7 +504,7 @@ public class RelMdColumnOrigins implements MetadataHandler getMultipleColumns(RexNode rexNode, RelNode input, - final RelMetadataQuery mq) { + final RelMetadataQuery mq) { final Set set = new LinkedHashSet<>(); final RexVisitor visitor = new RexVisitorImpl(true) { diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelColumnOrigin.java index 358299d5..f54eaf64 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelColumnOrigin.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -69,9 +69,8 @@ public class RelColumnOrigin { this.isDerived = isDerived; this.transform = transform; } - + // TODO: ------------ start:二次开发代码 --------------- // // ~ Methods ---------------------------------------------------------------- - /** * Returns table of origin. */ @@ -89,6 +88,8 @@ public class RelColumnOrigin { return iOriginColumn; } + // TODO: ------------ end:二次开发代码 --------------- // + /** * Consider the query select a+b as c, d as e from t. The * output column c has two origins (a and b), both of them derived. The diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java index 3b50226b..02452e58 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.13/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -73,7 +73,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Aggregate rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { if (iOutputColumn < rel.getGroupCount()) { // get actual index of Group columns. return mq.getColumnOrigins(rel.getInput(), rel.getGroupSet().asList().get(iOutputColumn)); @@ -95,7 +95,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Join rel, RelMetadataQuery mq, - int iOutputColumn) { + int iOutputColumn) { int nLeftColumns = rel.getLeft().getRowType().getFieldList().size(); Set set; boolean derived = false; @@ -117,6 +117,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(SetOp rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { final Set set = new LinkedHashSet<>(); for (RelNode input : rel.getInputs()) { Set inputSet = mq.getColumnOrigins(input, iOutputColumn); @@ -181,7 +182,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Snapshot rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } @@ -189,7 +190,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(SingleRel rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } @@ -197,7 +198,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Project rel, - final RelMetadataQuery mq, int iOutputColumn) { + final RelMetadataQuery mq, int iOutputColumn) { final RelNode input = rel.getInput(); RexNode rexNode = rel.getProjects().get(iOutputColumn); @@ -267,7 +268,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Calc rel, - final RelMetadataQuery mq, int iOutputColumn) { + final RelMetadataQuery mq, int iOutputColumn) { final RelNode input = rel.getInput(); final RexShuttle rexShuttle = new RexShuttle() { @@ -291,28 +292,29 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(Filter rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } public Set getColumnOrigins(Sort rel, RelMetadataQuery mq, - int iOutputColumn) { + int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } public Set getColumnOrigins(TableModify rel, RelMetadataQuery mq, - int iOutputColumn) { + int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } public Set getColumnOrigins(Exchange rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } public Set getColumnOrigins(TableFunctionScan rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { final Set set = new LinkedHashSet<>(); Set mappings = rel.getColumnMappings(); if (mappings == null) { @@ -348,7 +350,7 @@ public class RelMdColumnOrigins implements MetadataHandler getColumnOrigins(RelNode rel, - RelMetadataQuery mq, int iOutputColumn) { + RelMetadataQuery mq, int iOutputColumn) { // NOTE jvs 28-Mar-2006: We may get this wrong for a physical table // expression which supports projections. In that case, // it's up to the plugin writer to override with the @@ -501,7 +503,7 @@ public class RelMdColumnOrigins implements MetadataHandler getMultipleColumns(RexNode rexNode, RelNode input, - final RelMetadataQuery mq) { + final RelMetadataQuery mq) { final Set set = new LinkedHashSet<>(); final RexVisitor visitor = new RexVisitorImpl(true) { diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelColumnOrigin.java index 131d7894..29f8e0b2 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelColumnOrigin.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -68,7 +68,7 @@ public class RelColumnOrigin { this.isDerived = isDerived; this.transform = transform; } - + // TODO: ------------ start:二次开发代码 --------------- // // ~ Methods ---------------------------------------------------------------- /** @@ -87,6 +87,7 @@ public class RelColumnOrigin { public int getOriginColumnOrdinal() { return iOriginColumn; } + // TODO: ------------ end:二次开发代码 --------------- // /** * Consider the query select a+b as c, d as e from t. The diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java index 3fbffe06..e0576893 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.14/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -114,7 +114,7 @@ public class RelMdColumnOrigins implements MetadataHandler set = getMultipleColumns(rexNode, input, mq); return createDerivedColumnOrigins(set); } - + // TODO: ------------ end:二次开发代码 --------------- // public Set getColumnOrigins(Filter rel, RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); } diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelColumnOrigin.java index 358299d5..74ca219c 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelColumnOrigin.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -69,7 +69,7 @@ public class RelColumnOrigin { this.isDerived = isDerived; this.transform = transform; } - + // TODO: ------------ start:二次开发代码 --------------- // // ~ Methods ---------------------------------------------------------------- /** @@ -89,6 +89,7 @@ public class RelColumnOrigin { return iOriginColumn; } + // TODO: ------------ end:二次开发代码 --------------- // /** * Consider the query select a+b as c, d as e from t. The * output column c has two origins (a and b), both of them derived. The diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java index f3f0f57d..766e80b6 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.15/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -148,7 +148,7 @@ public class RelMdColumnOrigins implements MetadataHandler set = getMultipleColumns(rexNode, input, mq); return createDerivedColumnOrigins(set); } - + // TODO: ------------ end:二次开发代码 --------------- // public Set getColumnOrigins(Filter rel, RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelColumnOrigin.java b/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelColumnOrigin.java index 358299d5..74ca219c 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelColumnOrigin.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelColumnOrigin.java @@ -69,7 +69,7 @@ public class RelColumnOrigin { this.isDerived = isDerived; this.transform = transform; } - + // TODO: ------------ start:二次开发代码 --------------- // // ~ Methods ---------------------------------------------------------------- /** @@ -89,6 +89,7 @@ public class RelColumnOrigin { return iOriginColumn; } + // TODO: ------------ end:二次开发代码 --------------- // /** * Consider the query select a+b as c, d as e from t. The * output column c has two origins (a and b), both of them derived. The diff --git a/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java b/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java index f3f0f57d..1dbabd8c 100644 --- a/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java +++ b/fire-enhance/apache-flink/src/main/java-flink-1.16/org/apache/calcite/rel/metadata/RelMdColumnOrigins.java @@ -149,6 +149,7 @@ public class RelMdColumnOrigins implements MetadataHandler set = getMultipleColumns(rexNode, input, mq); return createDerivedColumnOrigins(set); } - + // TODO: ------------ start:二次开发代码 --------------- // public Set getColumnOrigins(Filter rel, RelMetadataQuery mq, int iOutputColumn) { return mq.getColumnOrigins(rel.getInput(), iOutputColumn); -- Gitee From 751f384b85dcdd0e24a4c240ea7d775c22a157b7 Mon Sep 17 00:00:00 2001 From: wsczm <646659000@qq.com> Date: Tue, 19 Dec 2023 10:43:31 +0800 Subject: [PATCH 8/9] =?UTF-8?q?flink=20sql=E5=AD=97=E6=AE=B5=E8=A1=80?= =?UTF-8?q?=E7=BC=98=E6=A0=BC=E5=BC=8F=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fire-core/src/main/resources/fire.properties | 175 ++++++++++--------- 1 file changed, 91 insertions(+), 84 deletions(-) diff --git a/fire-core/src/main/resources/fire.properties b/fire-core/src/main/resources/fire.properties index 9b675e4e..0b15c8cd 100644 --- a/fire-core/src/main/resources/fire.properties +++ b/fire-core/src/main/resources/fire.properties @@ -14,176 +14,183 @@ # See the License for the specific language governing permissions and # limitations under the License. # + # ----------------------------------------------- < fire \u914D\u7F6E > ------------------------------------------------ # # \u5F53\u524Dfire\u6846\u67B6\u7684\u7248\u672C\u53F7 -fire.version=${project.version} +fire.version = ${project.version} # fire\u5185\u7F6E\u7EBF\u7A0B\u6C60\u5927\u5C0F -fire.thread.pool.size=3 +fire.thread.pool.size = 3 # fire\u5185\u7F6E\u5B9A\u65F6\u4EFB\u52A1\u7EBF\u7A0B\u6C60\u5927\u5C0F -fire.thread.pool.schedule.size=3 +fire.thread.pool.schedule.size = 3 # \u662F\u5426\u542F\u7528restful\u670D\u52A1 -fire.rest.enable=true +fire.rest.enable = true # \u7528\u4E8E\u8BBE\u7F6E\u662F\u5426\u505A\u63A5\u53E3\u6821\u9A8C -fire.rest.filter.enable=true +fire.rest.filter.enable = true # \u662F\u5426\u6253\u5370\u914D\u7F6E\u4FE1\u606F -fire.conf.show.enable=false +fire.conf.show.enable = false # fire.conf.deploy.engine=className\uFF0C\u5728\u4E0D\u540C\u5F15\u64CE\u5B9E\u73B0\u6A21\u5757\u4E2D\uFF0C\u6307\u5B9A\u5177\u4F53\u53EF\u83B7\u53D6\u914D\u7F6E\u4FE1\u606F\u7684EngineConf\u5B50\u7C7B\u5B9E\u73B0\uFF0C\u7528\u4E8E\u540C\u6B65\u914D\u7F6E\u5230\u5404container\u8282\u70B9 # \u662F\u5426\u6253\u5370restful\u5730\u5740 -fire.rest.url.show.enable=false +fire.rest.url.show.enable = false # \u662F\u5426\u542F\u7528hostname\u4F5C\u4E3Arest\u670D\u52A1\u7684\u8BBF\u95EE\u5730\u5740 -fire.rest.url.hostname=false +fire.rest.url.hostname = false # \u662F\u5426\u5173\u95EDfire\u5185\u7F6E\u7684\u6240\u6709\u7D2F\u52A0\u5668 -fire.acc.enable=true +fire.acc.enable = true # \u65E5\u5FD7\u7D2F\u52A0\u5668\u5F00\u5173 -fire.acc.log.enable=true +fire.acc.log.enable = true # \u591A\u503C\u7D2F\u52A0\u5668\u5F00\u5173 -fire.acc.multi.counter.enable=true +fire.acc.multi.counter.enable = true # \u591A\u65F6\u95F4\u7EF4\u5EA6\u7D2F\u52A0\u5668\u5F00\u5173 -fire.acc.multi.timer.enable=true +fire.acc.multi.timer.enable = true # fire\u6846\u67B6\u57CB\u70B9\u65E5\u5FD7\u5F00\u5173\uFF0C\u5173\u95ED\u4EE5\u540E\u5C06\u4E0D\u518D\u6253\u5370\u57CB\u70B9\u65E5\u5FD7 -fire.log.enable=true +fire.log.enable = true # \u7528\u4E8E\u9650\u5B9Afire\u6846\u67B6\u4E2Dsql\u65E5\u5FD7\u7684\u5B57\u7B26\u4E32\u957F\u5EA6 -fire.log.sql.length=100 +fire.log.sql.length = 100 # \u662F\u5426\u542F\u7528\u4E3Aconnector\u6CE8\u518Cshutdown hook\uFF0C\u5F53jvm\u9000\u51FA\u524Dclose -fire.connector.shutdown_hook.enable=false +fire.connector.shutdown_hook.enable = false # fire\u6846\u67B6\u9488\u5BF9jdbc\u64CD\u4F5C\u540E\u6570\u636E\u96C6\u7684\u7F13\u5B58\u7B56\u7565 -fire.jdbc.storage.level=memory_and_disk_ser +fire.jdbc.storage.level = memory_and_disk_ser # \u901A\u8FC7JdbcConnector\u67E5\u8BE2\u540E\u5C06\u6570\u636E\u96C6\u653E\u5230\u591A\u5C11\u4E2A\u5206\u533A\u4E2D\uFF0C\u9700\u6839\u636E\u5B9E\u9645\u7684\u7ED3\u679C\u96C6\u505A\u914D\u7F6E -fire.jdbc.query.partitions=10 +fire.jdbc.query.partitions = 10 # \u662F\u5426\u542F\u7528\u5B9A\u65F6\u8C03\u5EA6 -fire.task.schedule.enable=true +fire.task.schedule.enable = true # \u662F\u5426\u542F\u7528\u52A8\u6001\u914D\u7F6E -fire.dynamic.conf.enable=true +fire.dynamic.conf.enable = true # fire\u6846\u67B6rest\u63A5\u53E3\u670D\u52A1\u6700\u5927\u7EBF\u7A0B\u6570 -fire.restful.max.thread=8 +fire.restful.max.thread = 8 # quartz\u6700\u5927\u7EBF\u7A0B\u6C60\u5927\u5C0F -fire.quartz.max.thread=5 +fire.quartz.max.thread = 5 # fire\u6536\u96C6\u65E5\u5FD7\u4FDD\u7559\u7684\u6700\u5C11\u8BB0\u5F55\u6570 -fire.acc.log.min.size=500 +fire.acc.log.min.size = 500 # fire\u6536\u96C6\u65E5\u5FD7\u4FDD\u7559\u7684\u6700\u591A\u8BB0\u5F55\u6570 -fire.acc.log.max.size=1000 +fire.acc.log.max.size = 1000 # timer\u7D2F\u52A0\u5668\u4FDD\u7559\u6700\u5927\u7684\u8BB0\u5F55\u6570 -fire.acc.timer.max.size=1000 +fire.acc.timer.max.size = 1000 # timer\u7D2F\u52A0\u5668\u6E05\u7406\u51E0\u5C0F\u65F6\u4E4B\u524D\u7684\u8BB0\u5F55 -fire.acc.timer.max.hour=12 +fire.acc.timer.max.hour = 12 # env\u7D2F\u52A0\u5668\u5F00\u5173 -fire.acc.env.enable=true +fire.acc.env.enable = true # env\u7D2F\u52A0\u5668\u4FDD\u7559\u6700\u591A\u7684\u8BB0\u5F55\u6570 -fire.acc.env.max.size=500 +fire.acc.env.max.size = 500 # env\u7D2F\u52A0\u5668\u4FDD\u7559\u6700\u5C11\u7684\u8BB0\u5F55\u6570 -fire.acc.env.min.size=100 +fire.acc.env.min.size = 100 # \u5B9A\u65F6\u8C03\u5EA6\u4EFB\u52A1\u9ED1\u540D\u5355\uFF0C\u914D\u7F6E\u7684value\u4E3A\u5B9A\u65F6\u4EFB\u52A1\u65B9\u6CD5\u540D\uFF0C\u591A\u4E2A\u4EE5\u9017\u53F7\u5206\u9694 -fire.scheduler.blacklist= +fire.scheduler.blacklist = # \u914D\u7F6E\u6253\u5370\u9ED1\u540D\u5355\uFF0C\u5305\u542B\u8BE5\u914D\u7F6E\u5C06\u4E0D\u88AB\u6253\u5370 -fire.conf.print.blacklist=.map.,pass,secret,zrc,connection,hdfs.ha,print.blacklist,yarn,namenode,metastore,address,redaction +fire.conf.print.blacklist = .map.,pass,secret,zrc,connection,hdfs.ha,print.blacklist,yarn,namenode,metastore,address,redaction # fire\u6846\u67B6restful\u7AEF\u53E3\u51B2\u7A81\u91CD\u8BD5\u6B21\u6570 -fire.restful.port.retry_num=3 +fire.restful.port.retry_num = 3 # fire\u6846\u67B6restful\u7AEF\u53E3\u51B2\u7A81\u91CD\u8BD5\u65F6\u95F4\uFF08ms\uFF09 -fire.restful.port.retry_duration=1000 +fire.restful.port.retry_duration = 1000 # \u65E5\u5FD7\u7684\u7EA7\u522B\uFF0C\u7EDF\u4E00\u524D\u7F00\u4E3A\uFF1Afire.log.level.conf. -fire.log.level.conf.org.apache.spark=INFO -fire.log.level.conf.org.spark_project=INFO -fire.log.level.conf.org.apache.kafka=WARN -fire.log.level.conf.org.apache.zookeeper=WARN -fire.log.level.conf.com.zto.fire=INFO -fire.log.level.conf.org.eclipse.jetty.server=ERROR +fire.log.level.conf.org.apache.spark = INFO +fire.log.level.conf.org.spark_project = INFO +fire.log.level.conf.org.apache.kafka = WARN +fire.log.level.conf.org.apache.zookeeper = WARN +fire.log.level.conf.com.zto.fire = INFO +fire.log.level.conf.org.eclipse.jetty.server = ERROR # \u662F\u5426\u5C06\u914D\u7F6E\u540C\u6B65\u5230executor\u3001taskmanager\u7AEF -fire.deploy_conf.enable=true +fire.deploy_conf.enable = true # \u6BCF\u4E2Ajvm\u5B9E\u4F8B\u5185\u90E8queue\u7528\u4E8E\u5B58\u653E\u5F02\u5E38\u5BF9\u8C61\u6570\u6700\u5927\u5927\u5C0F\uFF0C\u907F\u514D\u961F\u5217\u8FC7\u5927\u9020\u6210\u5185\u5B58\u6EA2\u51FA -fire.exception_bus.size=1000 +fire.exception_bus.size = 1000 # \u662F\u5426\u5F00\u542F\u5B9E\u65F6\u8840\u7F18\u91C7\u96C6 -fire.lineage.enable=true +fire.lineage.enable = true # \u662F\u5426\u5F00\u542F\u8840\u7F18\u7684debug\u6A21\u5F0F\u534F\u52A9\u6392\u67E5\u95EE\u9898 -fire.lineage.debug.enable=false +fire.lineage.debug.enable = false # \u662F\u5426\u91C7\u96C6\u539F\u59CB\u7684SQL\u8BED\u53E5 -fire.lineage.collect_sql.enable=true +fire.lineage.collect_sql.enable = true # \u662F\u5426\u5F00\u542F\u5C06\u8840\u7F18\u4FE1\u606F\u53D1\u9001\u5230\u6D88\u606F\u961F\u5217 -# fire.lineage.send.mq.enable = true -# fire.lineage.send.mq.url = node01:9092 -# fire.lineage.send.mq.topic = fire_lineage +fire.lineage.send.mq.enable = true +fire.lineage.send.mq.url = 10.50.0.35:9092 +fire.lineage.send.mq.topic = fire_lineage # \u8840\u7F18\u89E3\u6790\u5F02\u6B65\u7EBF\u7A0B\u6267\u884C\u7684\u6B21\u6570 -fire.lineage.run.count=360 +fire.lineage.run.count = 360 # \u8840\u7F18\u89E3\u6790\u5B58\u653E\u7684\u961F\u5217\u6570\u6700\u5927\u503C -fire.lineage.max.size=500 +fire.lineage.max.size = 500 # \u5B9A\u65F6\u89E3\u6790\u57CB\u70B9SQL\u7684\u521D\u59CB\u5EF6\u8FDF\uFF08s\uFF09 -fire.lineage.run.initialDelay=60 +fire.lineage.run.initialDelay = 60 # \u5B9A\u65F6\u89E3\u6790\u57CB\u70B9SQL\u7684\u6267\u884C\u9891\u7387\uFF08s\uFF09 -fire.lineage.run.period=120 +fire.lineage.run.period = 120 # \u7528\u4E8Ejdbc url\u7684\u8BC6\u522B\uFF0C\u5F53\u65E0\u6CD5\u901A\u8FC7driver class\u8BC6\u522B\u6570\u636E\u6E90\u65F6\uFF0C\u5C06\u4ECEurl\u4E2D\u7684\u7AEF\u53E3\u53F7\u8FDB\u884C\u533A\u5206\uFF0C\u4E0D\u540C\u6570\u636E\u914D\u7F6E\u4F7F\u7528\u7EDF\u4E00\u7684\u524D\u7F00\uFF1Afire.lineage.datasource.map. -fire.lineage.datasource.map.tidb=4000 -fire.lineage.datasource.map.doris=9030 -fire.lineage.datasource.map.presto=7890 +fire.lineage.datasource.map.tidb = 4000 +fire.lineage.datasource.map.doris = 9030 +fire.lineage.datasource.map.presto = 7890 # \u662F\u5426\u5F00\u542F\u914D\u7F6E\u81EA\u9002\u5E94\u524D\u7F00\uFF0C\u81EA\u52A8\u4E3A\u914D\u7F6E\u52A0\u4E0A\u5F15\u64CE\u524D\u7F00\uFF08spark.|flink.\uFF09 -fire.conf.adaptive.prefix=true +fire.conf.adaptive.prefix = true # \u7528\u6237\u7EDF\u4E00\u914D\u7F6E\u6587\u4EF6\uFF0C\u5141\u8BB8\u7528\u6237\u5728\u8BE5\u914D\u7F6E\u6587\u4EF6\u4E2D\u5B58\u653E\u516C\u5171\u7684\u914D\u7F6E\u4FE1\u606F\uFF0C\u4F18\u5148\u7EA7\u4F4E\u4E8E\u4EFB\u52A1\u914D\u7F6E\u6587\u4EF6\uFF08\u591A\u4E2A\u4EE5\u9017\u53F7\u5206\u9694\uFF09 -fire.user.common.conf=common.properties,conf-test.properties,conf-dev.properties,conf-prod.properties +fire.user.common.conf = common.properties,conf-test.properties,conf-dev.properties,conf-prod.properties # fire\u63A5\u53E3\u8BA4\u8BC1\u79D8\u94A5 -fire.rest.server.secret=fire +fire.rest.server.secret = fire # \u662F\u5426\u5728\u8C03\u7528shutdown\u65B9\u6CD5\u65F6\u4E3B\u52A8\u9000\u51FAjvm\u8FDB\u7A0B -fire.shutdown.auto.exit=true +fire.shutdown.auto.exit = true # \u8C03\u7528print\u6253\u5370\u65E5\u5FD7\u81F3\u591A\u591A\u5C11\u6761\uFF0C\u7528\u4E8E\u9650\u5236\u6BCF\u4E2Acontainer\u6253\u5370\u7684\u6570\u636E\u91CF\uFF0C\u907F\u514D\u5927\u65E5\u5FD7\u6587\u4EF6\u5360\u6EE1\u78C1\u76D8\uFF0C\u5F53\u503C\u5C0F\u4E8E\u7B49\u4E8E\u96F6\u65F6\u8868\u793A\u4E0D\u9650\u5236 -fire.print.limit=1000000 +fire.print.limit = 1000000 # \u7528\u4E8E\u6307\u5B9A\u5F53\u524D\u8FD0\u884C\u73AF\u5883\u662F\u5426\u4E3Alocal\u6A21\u5F0F\uFF08\u4E3B\u8981\u7528\u4E8Eflink-shell\u7684\u672C\u5730\u914D\u7F6E\u6587\u4EF6\u52A0\u8F7D\uFF09 # fire.env.local = false # \u662F\u5426\u542F\u7528\u57FA\u4E8E\u6CE8\u89E3\u8FDB\u884C\u914D\u7F6E\uFF08@Kafka\u3001@RocketMQ\u3001@Hive\u3001@HBase\u7B49\uFF09 -fire.conf.annotation.enable=true +fire.conf.annotation.enable = true # \u662F\u5426\u542F\u7528\u5F02\u5E38\u5806\u6808\u65E5\u5FD7\u7684\u91C7\u96C6\uFF0C\u4E00\u65E6\u5F00\u542F\uFF0C\u5C06\u81EA\u52A8\u91C7\u96C6sql+api\u7B49\u5F02\u5E38\u5806\u6808\uFF0C\u5E76\u53D1\u9001\u5230\u6307\u5B9A\u7684kafka topic\u4E2D -fire.analysis.log.exception.stack.enable=false +fire.analysis.log.exception.stack.enable = false # \u5F02\u5E38\u4FE1\u606F\u53D1\u9001MQ\u5931\u8D25\u65F6\u6700\u5927\u91CD\u8BD5\u6B21\u6570 -fire.analysis.log.exception.send.maxRetires=10 +fire.analysis.log.exception.send.maxRetires = 10 # \u5F02\u5E38\u6D88\u606F\u53D1\u9001MQ\u8D85\u65F6\u65F6\u95F4 -fire.analysis.log.exception.send.timeout=3000 +fire.analysis.log.exception.send.timeout = 3000 # \u662F\u5426\u81EA\u52A8\u63D0\u4EA4\u4EFB\u52A1 -fire.job.autoStart=true +fire.job.autoStart = true # fire\u6846\u67B6\u540C\u6B65\u7D2F\u52A0\u5668\u5728\u6BCF\u4E2Acontainer\u7AEF\u5B58\u653E\u5B57\u7B26\u4E32\u7684\u6700\u5927\u8BB0\u5F55\u6570 -fire.acc.sync.max.size=100 +fire.acc.sync.max.size = 100 + # ----------------------------------------------- < kafka \u914D\u7F6E > ----------------------------------------------- # # kafka\u96C6\u7FA4\u540D\u79F0\u4E0E\u96C6\u7FA4\u5730\u5740\u6620\u5C04\uFF0C\u4EFB\u52A1\u4E2D\u901A\u8FC7kafka.brokers.name=local\u5373\u53EF\u8FDE\u5230\u4EE5\u4E0B\u914D\u7F6E\u7684broker\u5730\u5740 # fire.kafka.cluster.map.local = localhost:9092,localhost02:9092 + # ----------------------------------------------- < hive \u914D\u7F6E > ------------------------------------------------ # # \u9ED8\u8BA4\u7684hive\u6570\u636E\u5E93 -fire.hive.default.database.name=tmp +fire.hive.default.database.name = tmp # \u9ED8\u8BA4\u7684hive\u5206\u533A\u5B57\u6BB5\u540D\u79F0 -fire.hive.table.default.partition.name=ds +fire.hive.table.default.partition.name = ds # \u79BB\u7EBF\u96C6\u7FA4hive metastore\u5730\u5740\uFF08\u522B\u540D\uFF1Alocal\uFF09\uFF0C\u4EFB\u52A1\u4E2D\u901A\u8FC7fire.hive.cluster=local\u5373\u53EF\u8FDE\u5230\u4E00\u4E0B\u914D\u7F6E\u7684thrift\u5730\u5740 # fire.hive.cluster.map.local = thrift://localhost:9083,thrift://localhost02:9083 # \u662F\u5426\u542F\u7528hive metastore url\u7684\u968F\u673A\u9009\u62E9 -fire.hive.metastore.url.random.enable=true +fire.hive.metastore.url.random.enable = true + # ----------------------------------------------- < HBase \u914D\u7F6E > ----------------------------------------------- # # \u4E00\u6B21\u8BFB\u5199HBase\u7684\u6570\u636E\u91CF -fire.hbase.batch.size=10000 +fire.hbase.batch.size = 10000 # fire\u6846\u67B6\u9488\u5BF9hbase\u64CD\u4F5C\u540E\u6570\u636E\u96C6\u7684\u7F13\u5B58\u7B56\u7565 -fire.hbase.storage.level=memory_and_disk_ser +fire.hbase.storage.level = memory_and_disk_ser # \u901A\u8FC7HBase scan\u540Erepartition\u7684\u5206\u533A\u6570\uFF0C\u9700\u6839\u636Escan\u540E\u7684\u6570\u636E\u91CF\u505A\u914D\u7F6E -fire.hbase.scan.partitions=-1 +fire.hbase.scan.partitions = -1 # \u540E\u7EED\u7248\u672C\u4F1A\u5E9F\u5F03\uFF0C\u5E9F\u5F03\u540Efire.hbase.scan.partitions\u9ED8\u8BA4\u503C\u6539\u4E3A1200 -fire.hbase.scan.repartitions=1200 +fire.hbase.scan.repartitions = 1200 # \u662F\u5426\u5F00\u542FHBase\u8868\u5B58\u5728\u5224\u65AD\u7684\u7F13\u5B58\uFF0C\u5F00\u542F\u540E\u8868\u5B58\u5728\u5224\u65AD\u5C06\u907F\u514D\u5927\u91CF\u7684connection\u6D88\u8017 -fire.hbase.table.exists.cache.enable=true +fire.hbase.table.exists.cache.enable = true # \u662F\u5426\u5F00\u542FHBase\u8868\u5B58\u5728\u5217\u8868\u7F13\u5B58\u7684\u5B9A\u65F6\u66F4\u65B0\u4EFB\u52A1 -fire.hbase.table.exists.cache.reload.enable=true +fire.hbase.table.exists.cache.reload.enable = true # \u5B9A\u65F6\u5237\u65B0\u7F13\u5B58HBase\u8868\u4EFB\u52A1\u7684\u521D\u59CB\u5EF6\u8FDF\uFF08s\uFF09 -fire.hbase.table.exists.cache.initialDelay=60 +fire.hbase.table.exists.cache.initialDelay = 60 # \u5B9A\u65F6\u5237\u65B0\u7F13\u5B58HBase\u8868\u4EFB\u52A1\u7684\u6267\u884C\u9891\u7387\uFF08s\uFF09 -fire.hbase.table.exists.cache.period=600 +fire.hbase.table.exists.cache.period = 600 # hbase\u96C6\u7FA4\u7684zk\u5730\u5740\uFF08\u522B\u540D\uFF1Alocal\uFF09\uFF0C\u4EFB\u52A1\u4E2D\u901A\u8FC7hbase.cluster=local\u5373\u53EF\u8FDE\u5230\u5BF9\u5E94\u7684hbase\u96C6\u7FA4 # fire.hbase.cluster.map.local = localhost:2181,localhost02:2181 + # hbase connection \u914D\u7F6E\uFF0C\u7EA6\u5B9A\u4EE5\uFF1Afire.hbase.conf.\u5F00\u5934\uFF0C\u6BD4\u5982\uFF1Afire.hbase.conf.hbase.rpc.timeout\u5BF9\u5E94hbase\u4E2D\u7684\u914D\u7F6E\u4E3Ahbase.rpc.timeout -fire.hbase.conf.hbase.zookeeper.property.clientPort=2181 -fire.hbase.conf.zookeeper.znode.parent=/hbase -fire.hbase.conf.hbase.rpc.timeout=600000 -fire.hbase.conf.hbase.snapshot.master.timeoutMillis=600000 -fire.hbase.conf.hbase.snapshot.region.timeout=600000 +fire.hbase.conf.hbase.zookeeper.property.clientPort = 2181 +fire.hbase.conf.zookeeper.znode.parent = /hbase +fire.hbase.conf.hbase.rpc.timeout = 600000 +fire.hbase.conf.hbase.snapshot.master.timeoutMillis = 600000 +fire.hbase.conf.hbase.snapshot.region.timeout = 600000 + # ---------------------------------------------- < Arths \u914D\u7F6E > ----------------------------------------------- # # \u662F\u5426\u542F\u7528arthas\u4EE5\u4FBF\u4E8E\u5206\u6790\u4EFB\u52A1\u7684\u6027\u80FD -fire.analysis.arthas.enable=false +fire.analysis.arthas.enable = false # \u662F\u5426\u5728container\u7AEF\u542F\u52A8arthas -fire.analysis.arthas.container.enable=false +fire.analysis.arthas.container.enable = false # \u4EE5fire.analysis.arthas.conf.\u4E3A\u524D\u7F00\u7684\u914D\u7F6E\u652F\u6301arthas\u5168\u90E8\u7684\u53C2\u6570 + # --------------------------------------------- < \u914D\u7F6E\u4E2D\u5FC3\u914D\u7F6E > --------------------------------------------- # # \u6CE8\uFF1A\u914D\u7F6E\u4E2D\u5FC3\u7CFB\u7EDF\u5F02\u5E38\u65F6\u53EF\u8BBE\u7F6E\u4E3Afalse\uFF0C\u4E0D\u53D7\u914D\u7F6E\u4E2D\u5FC3\u5F71\u54CD\uFF0C\u53EF\u6B63\u5E38\u53D1\u5E03\u548C\u8FD0\u884C\uFF0C\u4EFB\u52A1\u7EA7\u522B\u914D\u7F6E\u4E0D\u751F\u6548 -fire.config_center.enable=false +fire.config_center.enable = false # \u672C\u5730\u8FD0\u884C\u73AF\u5883\u4E0B\uFF08Windows\u3001Mac\uFF09\u662F\u5426\u8C03\u7528\u914D\u7F6E\u4E2D\u5FC3\u63A5\u53E3\u83B7\u53D6\u914D\u7F6E\u4FE1\u606F\uFF0C\u4EFB\u52A1\u7EA7\u522B\u914D\u7F6E\u4E0D\u751F\u6548 -fire.config_center.local.enable=false +fire.config_center.local.enable = false # \u5E73\u53F0\u5C42\u4E3A\u6BCF\u4E2A\u4EFB\u52A1\u5206\u914D\u7684\u552F\u4E00id\u6807\u8BC6\uFF0C\u7528\u4E8E\u533A\u5206\u4E0D\u540C\u4EFB\u52A1\u7684\u914D\u7F6E\u6587\u4EF6\u83B7\u53D6 # fire.config_center.app.id \ No newline at end of file -- Gitee From 4cac4e30c5e93699e9c70794a08647e80bee84fe Mon Sep 17 00:00:00 2001 From: wsczm <646659000@qq.com> Date: Tue, 19 Dec 2023 15:34:52 +0800 Subject: [PATCH 9/9] =?UTF-8?q?flink=20sql=E5=AD=97=E6=AE=B5=E8=A1=80?= =?UTF-8?q?=E7=BC=98=E6=A0=BC=E5=BC=8F=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fire-core/src/main/resources/fire.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fire-core/src/main/resources/fire.properties b/fire-core/src/main/resources/fire.properties index 0b15c8cd..67216644 100644 --- a/fire-core/src/main/resources/fire.properties +++ b/fire-core/src/main/resources/fire.properties @@ -100,7 +100,7 @@ fire.lineage.debug.enable = fire.lineage.collect_sql.enable = true # \u662F\u5426\u5F00\u542F\u5C06\u8840\u7F18\u4FE1\u606F\u53D1\u9001\u5230\u6D88\u606F\u961F\u5217 fire.lineage.send.mq.enable = true -fire.lineage.send.mq.url = 10.50.0.35:9092 +fire.lineage.send.mq.url = node01:9092 fire.lineage.send.mq.topic = fire_lineage # \u8840\u7F18\u89E3\u6790\u5F02\u6B65\u7EBF\u7A0B\u6267\u884C\u7684\u6B21\u6570 fire.lineage.run.count = 360 -- Gitee