From 2431f72e5c87e8613619114ec0b35fbba2b877a4 Mon Sep 17 00:00:00 2001 From: Denys Kuzmenko Date: Thu, 17 Oct 2024 16:47:29 +0200 Subject: [PATCH] refactor --- .../mr/hive/HiveIcebergStorageHandler.java | 10 ++++++++++ .../apache/hadoop/hive/ql/ddl/DDLUtils.java | 5 ++--- .../table/info/desc/DescTableOperation.java | 12 ++++++----- .../misc/truncate/TruncateTableAnalyzer.java | 2 +- .../apache/hadoop/hive/ql/metadata/Hive.java | 4 ++-- .../hive/ql/metadata/HiveStorageHandler.java | 4 ++++ .../hive/ql/optimizer/StatsOptimizer.java | 2 +- .../hive/ql/parse/BaseSemanticAnalyzer.java | 12 +++++------ .../hadoop/hive/ql/parse/ParseUtils.java | 4 ++-- .../ql/parse/RewriteSemanticAnalyzer.java | 2 +- .../hadoop/hive/ql/stats/StatsUtils.java | 20 +++++++++---------- 11 files changed, 45 insertions(+), 32 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index bad8609a2f73..86a1a32c5ddf 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -426,6 +426,16 @@ public boolean canProvideBasicStatistics() { return true; } + @Override + public boolean canProvidePartitionStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); + Map summary = table.currentSnapshot().summary(); + if (summary != null) { + return Boolean.parseBoolean(summary.get(SnapshotSummary.PARTITION_SUMMARY_PROP)); + } + return false; + } + @Override public StorageFormatDescriptor getStorageFormatDescriptor(org.apache.hadoop.hive.metastore.api.Table table) throws SemanticException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLUtils.java index f1511a79894a..4e10507869f1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLUtils.java @@ -204,9 +204,8 @@ public static void setColumnsAndStorePartitionTransformSpecOfTable( HiveConf conf, Table tbl) { Optional> cols = Optional.ofNullable(columns); Optional> partCols = Optional.ofNullable(partitionColumns); - HiveStorageHandler storageHandler = tbl.getStorageHandler(); - - if (storageHandler != null && storageHandler.alwaysUnpartitioned()) { + + if (tbl.alwaysUnpartitioned()) { tbl.getSd().setCols(new ArrayList<>()); cols.ifPresent(c -> tbl.getSd().getCols().addAll(c)); if (partCols.isPresent() && !partCols.get().isEmpty()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java index 57caab14e826..921eef40e0bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java @@ -150,11 +150,13 @@ private void getColumnsNoColumnPath(Table table, Partition partition, List tblProps = table.getParameters() == null ? - new HashMap() : table.getParameters(); + new HashMap<>() : table.getParameters(); Map valueMap = new HashMap<>(); Map stateMap = new HashMap<>(); @@ -164,7 +166,7 @@ private void getColumnsNoColumnPath(Table table, Partition partition, List partitionProps = p.getParameters(); @@ -190,7 +192,7 @@ private void getColumnsNoColumnPath(Table table, Partition partition, List cols, List colStats, Deserializer deserializer) - throws SemanticException, HiveException, MetaException { + throws HiveException, MetaException { // when column name is specified in describe table DDL, colPath will be db_name.table_name.column_name String colName = desc.getColumnPath().split("\\.")[2]; List colNames = Lists.newArrayList(colName.toLowerCase()); @@ -199,7 +201,7 @@ private void getColumnDataColPathSpecified(Table table, Partition part, List tableProps = table.getParameters() == null ? - new HashMap() : table.getParameters(); + new HashMap<>() : table.getParameters(); if (table.isPartitionKey(colNames.get(0))) { getColumnDataForPartitionKeyColumn(table, cols, colStats, colNames, tableProps); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/truncate/TruncateTableAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/truncate/TruncateTableAnalyzer.java index 89468dd6bfd8..62a4ed6dd47d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/truncate/TruncateTableAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/truncate/TruncateTableAnalyzer.java @@ -166,7 +166,7 @@ private void addTruncateTableOutputs(ASTNode root, Table table, Map getPartitionsWithAuth(Table tbl, Map par */ public List getPartitions(Table tbl, Map partialPartSpec) throws HiveException { - if (tbl.getStorageHandler() != null && tbl.getStorageHandler().alwaysUnpartitioned()) { + if (tbl.alwaysUnpartitioned()) { return tbl.getStorageHandler().getPartitions(tbl, partialPartSpec, false); } else { return getPartitions(tbl, partialPartSpec, (short)-1); @@ -4554,7 +4554,7 @@ public boolean getPartitionsByExpr(Table tbl, ExprNodeDesc expr, HiveConf conf, try { Preconditions.checkNotNull(partitions); String defaultPartitionName = HiveConf.getVar(conf, ConfVars.DEFAULT_PARTITION_NAME); - if (tbl.getStorageHandler() != null && tbl.getStorageHandler().alwaysUnpartitioned()) { + if (tbl.alwaysUnpartitioned()) { partitions.addAll(tbl.getStorageHandler().getPartitionsByExpr(tbl, expr)); return false; } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java index f79be16590ed..f78d0eea416a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java @@ -260,6 +260,10 @@ default boolean canProvideBasicStatistics() { return false; } + default boolean canProvidePartitionStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + return false; + } + /** * Return some col statistics (Lower bounds, Upper bounds, Null value counts, NaN, total counts) calculated by * the underlying storage handler implementation. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index 3287aca25cb4..f4567c505371 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -932,7 +932,7 @@ private Collection> verifyAndGetPartColumnStats( private Long getRowCnt( ParseContext pCtx, TableScanOperator tsOp, Table tbl) throws HiveException { Long rowCnt = 0L; - if (tbl.isPartitioned() && !tbl.alwaysUnpartitioned()) { + if (tbl.isPartitioned() && StatsUtils.checkCanProvidePartitionStats(tbl)) { for (Partition part : pctx.getPrunedPartitions( tsOp.getConf().getAlias(), tsOp).getPartitions()) { if (!StatsUtils.areBasicStatsUptoDateForQueryAnswering(part.getTable(), part.getParameters())) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index f528d67716a5..9d1e95c37662 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1256,7 +1256,7 @@ public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartit validatePartSpec(tableHandle, tmpPartSpec, ast, conf, false); List parts = tableHandle.getPartitionKeys(); - if (tableHandle.getStorageHandler() != null && tableHandle.getStorageHandler().alwaysUnpartitioned()) { + if (tableHandle.alwaysUnpartitioned()) { partSpec = tmpPartSpec; } else { partSpec = new LinkedHashMap<>(partspec.getChildCount()); @@ -1269,7 +1269,7 @@ public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartit // check if the partition spec is valid if (numDynParts > 0) { int numStaPart; - if (tableHandle.getStorageHandler() != null && tableHandle.getStorageHandler().alwaysUnpartitioned()) { + if (tableHandle.alwaysUnpartitioned()) { numStaPart = partSpec.size() - numDynParts; } else { numStaPart = parts.size() - numDynParts; @@ -1281,7 +1281,7 @@ public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartit // Partitions in partSpec is already checked via storage handler. // Hence no need to check for cases which are always unpartitioned. - if (tableHandle.getStorageHandler() == null || !tableHandle.getStorageHandler().alwaysUnpartitioned()) { + if (!tableHandle.alwaysUnpartitioned()) { // check the partitions in partSpec be the same as defined in table schema if (partSpec.keySet().size() != parts.size()) { ErrorPartSpec(partSpec, parts); @@ -1314,7 +1314,7 @@ public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartit partitions = db.getPartitions(tableHandle, partSpec); } else { // this doesn't create partition. - if (tableHandle.getStorageHandler() == null || !tableHandle.getStorageHandler().alwaysUnpartitioned()) { + if (!tableHandle.alwaysUnpartitioned()) { partHandle = db.getPartition(tableHandle, partSpec, false); } if (partHandle == null) { @@ -1706,7 +1706,7 @@ public static Map getPartSpec(ASTNode node) { public static void validatePartSpec(Table tbl, Map partSpec, ASTNode astNode, HiveConf conf, boolean shouldBeFull) throws SemanticException { - if (tbl.getStorageHandler() != null && tbl.getStorageHandler().alwaysUnpartitioned()) { + if (tbl.alwaysUnpartitioned()) { tbl.getStorageHandler().validatePartSpec(tbl, partSpec, Context.RewritePolicy.get(conf)); } else { tbl.validatePartColumnNames(partSpec, shouldBeFull); @@ -1725,7 +1725,7 @@ public static void validatePartSpec(Table tbl, Map partSpec, * @param partitionClausePresent Whether a partition clause is present in the query (e.g. PARTITION(last_name='Don')) */ protected static void validateUnsupportedPartitionClause(Table tbl, boolean partitionClausePresent) { - if (partitionClausePresent && tbl.getStorageHandler() != null && tbl.getStorageHandler().alwaysUnpartitioned()) { + if (partitionClausePresent && tbl.alwaysUnpartitioned()) { throw new UnsupportedOperationException("Using partition spec in query is unsupported for non-native table" + " backed by: " + tbl.getStorageHandler().toString()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 9cbd47123a0f..ba2c9cfbf135 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -579,8 +579,8 @@ public static Map> getFullPartitionSpecs( CommonTree ast, Table table, Configuration conf, boolean canGroupExprs) throws SemanticException { String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULT_PARTITION_NAME); Map colTypes = new HashMap<>(); - List partitionKeys = table.getStorageHandler() != null && table.getStorageHandler().alwaysUnpartitioned() ? - table.getStorageHandler().getPartitionKeys(table) : table.getPartitionKeys(); + List partitionKeys = table.alwaysUnpartitioned() ? + table.getStorageHandler().getPartitionKeys(table) : table.getPartitionKeys(); for (FieldSchema fs : partitionKeys) { colTypes.put(fs.getName().toLowerCase(), fs.getType()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index f968c65443c5..bfd4b02295c7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -319,7 +319,7 @@ private static String normalizeColName(String colName) { protected void updateOutputs(Table targetTable) { markReadEntityForUpdate(); - if (targetTable.isPartitioned()) { + if (targetTable.isPartitioned() && !targetTable.alwaysUnpartitioned()) { List partitionsRead = getRestrictedPartitionSet(targetTable); if (!partitionsRead.isEmpty()) { // if there is WriteEntity with WriteType=UPDATE/DELETE for target table, replace it with diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 239f57b69b3e..e977822959e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -276,8 +276,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p boolean estimateStats = HiveConf.getBoolVar(conf, ConfVars.HIVE_STATS_ESTIMATE_STATS); boolean metaTable = table.getMetaTable() != null; - if (!table.isPartitioned()) { - + if (!table.isPartitioned() || !checkCanProvidePartitionStats(table)) { Factory basicStatsFactory = new BasicStats.Factory(); if (estimateStats) { @@ -2035,13 +2034,12 @@ public static Range combineRange(Range range1, Range range2) { } public static boolean checkCanProvideStats(Table table) { - if (MetaStoreUtils.isExternalTable(table.getTTable())) { - if (MetaStoreUtils.isNonNativeTable(table.getTTable()) && table.getStorageHandler().canProvideBasicStatistics()) { - return true; - } - return false; - } - return true; + return !MetaStoreUtils.isExternalTable(table.getTTable()) || table.isNonNative() + && table.getStorageHandler().canProvideBasicStatistics(); + } + + public static boolean checkCanProvidePartitionStats(Table table) { + return !table.isNonNative() || table.getStorageHandler().canProvidePartitionStatistics(table); } /** @@ -2049,7 +2047,7 @@ public static boolean checkCanProvideStats(Table table) { * Can run additional checks compared to the version in StatsSetupConst. */ public static boolean areBasicStatsUptoDateForQueryAnswering(Table table, Map params) { - return checkCanProvideStats(table) == true ? StatsSetupConst.areBasicStatsUptoDate(params) : false; + return checkCanProvideStats(table) && StatsSetupConst.areBasicStatsUptoDate(params); } /** @@ -2057,7 +2055,7 @@ public static boolean areBasicStatsUptoDateForQueryAnswering(Table table, Map params, String colName) { - return checkCanProvideStats(table) == true ? StatsSetupConst.areColumnStatsUptoDate(params, colName) : false; + return checkCanProvideStats(table) && StatsSetupConst.areColumnStatsUptoDate(params, colName); } /**