Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.RelInput;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelWriter;
import org.apache.calcite.rel.core.TableFunctionScan;
import org.apache.calcite.rel.metadata.RelColumnMapping;
import org.apache.calcite.rel.type.RelDataType;
Expand All @@ -34,6 +35,9 @@

public class HiveTableFunctionScan extends TableFunctionScan implements HiveRelNode {

// Whether this is a LATERAL VIEW OUTER
private final boolean outer;

/**
* @param cluster
* cluster - Cluster that this relational expression belongs to
Expand All @@ -49,28 +53,54 @@ public class HiveTableFunctionScan extends TableFunctionScan implements HiveRelN
* rowType - Row type produced by function
* @param columnMappings
* columnMappings - Column mappings associated with this function
* @param outer
* outer - true if this is a LATERAL VIEW OUTER
*/
protected HiveTableFunctionScan(RelOptCluster cluster, RelTraitSet traitSet, List<RelNode> inputs,
RexNode rexCall, Type elementType, RelDataType rowType, Set<RelColumnMapping> columnMappings) {
RexNode rexCall, Type elementType, RelDataType rowType, Set<RelColumnMapping> columnMappings,
boolean outer) {
super(cluster, traitSet, inputs, rexCall, elementType, rowType, columnMappings);
this.outer = outer;
}

public HiveTableFunctionScan(RelInput input) {
super(input);
this.outer = input.getBoolean("outer", false);
}

public static HiveTableFunctionScan create(RelOptCluster cluster, RelTraitSet traitSet,
List<RelNode> inputs, RexNode rexCall, Type elementType, RelDataType rowType,
Set<RelColumnMapping> columnMappings) throws CalciteSemanticException {
return new HiveTableFunctionScan(cluster, traitSet, inputs, rexCall, elementType, rowType,
columnMappings);
columnMappings, false);
}

public static HiveTableFunctionScan create(RelOptCluster cluster, RelTraitSet traitSet,
List<RelNode> inputs, RexNode rexCall, Type elementType, RelDataType rowType,
Set<RelColumnMapping> columnMappings, boolean outer) throws CalciteSemanticException {
return new HiveTableFunctionScan(cluster, traitSet, inputs, rexCall, elementType, rowType,
columnMappings, outer);
}

/** Returns true if this represents a LATERAL VIEW OUTER. */
public boolean isOuter() {
return outer;
}

@Override
public RelWriter explainTerms(RelWriter pw) {
super.explainTerms(pw);
if (outer) {
pw.item("outer", true);
}
return pw;
}

@Override
public TableFunctionScan copy(RelTraitSet traitSet, List<RelNode> inputs, RexNode rexCall,
Type elementType, RelDataType rowType, Set<RelColumnMapping> columnMappings) {
return new HiveTableFunctionScan(getCluster(), traitSet, inputs, rexCall,
elementType, rowType, columnMappings);
elementType, rowType, columnMappings, outer);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -703,14 +703,21 @@ private static QueryBlockInfo createASTLateralView(TableFunctionScan tfs, QueryB
sel.add(selexpr.node());

// place the SELECT clause under the LATERAL VIEW clause
ASTBuilder lateralview = ASTBuilder.construct(HiveParser.TOK_LATERAL_VIEW, "TOK_LATERAL_VIEW");
lateralview.add(sel.node());
final boolean isOuterLateralView = tfs instanceof HiveTableFunctionScan htfs && htfs.isOuter();
final int lateralViewToken = isOuterLateralView
? HiveParser.TOK_LATERAL_VIEW_OUTER
: HiveParser.TOK_LATERAL_VIEW;
final String lateralViewText = isOuterLateralView
? "TOK_LATERAL_VIEW_OUTER"
: "TOK_LATERAL_VIEW";
ASTBuilder lateralView = ASTBuilder.construct(lateralViewToken, lateralViewText);
lateralView.add(sel.node());

// finally, add the LATERAL VIEW clause under the left side source which is the base table.
lateralview.add(tableFunctionSource.ast);
lateralView.add(tableFunctionSource.ast);

Schema outputSchema = new Schema(tableFunctionSource.schema, new Schema(alias, lvFields));
return new QueryBlockInfo(outputSchema, lateralview.node());
return new QueryBlockInfo(outputSchema, lateralView.node());
}

private boolean isLateralView(RelNode relNode) {
Expand Down
13 changes: 8 additions & 5 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -1042,8 +1042,8 @@ boolean isCBOExecuted() {

@Override
boolean isCBOSupportedLateralView(ASTNode lateralView) {
// LATERAL VIEW OUTER not supported in CBO
return lateralView.getToken().getType() != HiveParser.TOK_LATERAL_VIEW_OUTER;
// Both LATERAL VIEW and LATERAL VIEW OUTER are supported in CBO.
return true;
}

@Override
Expand Down Expand Up @@ -2980,7 +2980,8 @@ private RelNode genJoinLogicalPlan(QB qb, ASTNode joinParseTree, Map<String, Rel
leftRel = aliasToRel.get(leftTableAlias);
} else if (SemanticAnalyzer.isJoinToken(left)) {
leftRel = genJoinLogicalPlan(qb, left, aliasToRel, outerNameToPosMap, outerRR);
} else if (left.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
} else if (left.getToken().getType() == HiveParser.TOK_LATERAL_VIEW
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: perhaps we could refactor all these type == HiveParser.TOK_LATERAL_VIEW || type == HiveParser.TOK_LATERAL_VIEW_OUTER into a single helper method?

|| left.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
leftRel = genLateralViewPlans(qb, left, aliasToRel);
} else {
assert (false);
Expand All @@ -2994,7 +2995,8 @@ private RelNode genJoinLogicalPlan(QB qb, ASTNode joinParseTree, Map<String, Rel
|| (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
rightTableAlias = getTableAlias(right);
rightRel = aliasToRel.get(rightTableAlias);
} else if (right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
} else if (right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW
|| right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
rightRel = genLateralViewPlans(qb, right, aliasToRel);
} else {
assert (false);
Expand Down Expand Up @@ -3374,7 +3376,8 @@ private RelNode genLateralViewPlans(QB qb, ASTNode lateralView, Map<String, RelN

// next token is either the table alias name or another lateral view (which we will call
// recursively)
RelNode inputRel = next.getToken().getType() == HiveParser.TOK_LATERAL_VIEW
int nextType = next.getToken().getType();
RelNode inputRel = (nextType == HiveParser.TOK_LATERAL_VIEW || nextType == HiveParser.TOK_LATERAL_VIEW_OUTER)
? genLateralViewPlans(qb, next, aliasToRel)
: aliasToRel.get(getTableAlias(next));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
Expand Down Expand Up @@ -91,14 +90,15 @@ public class LateralViewPlan {

public LateralViewPlan(ASTNode lateralView, RelOptCluster cluster, RelNode inputRel,
RowResolver inputRR, UnparseTranslator unparseTranslator,
HiveConf conf, FunctionHelper functionHelper
) throws SemanticException {
HiveConf conf, FunctionHelper functionHelper) throws SemanticException {
// initialize global variables containing helper information
this.cluster = cluster;
this.unparseTranslator = unparseTranslator;
this.conf = conf;
this.functionHelper = functionHelper;

boolean isOuter = lateralView.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER;

// AST should have form of LATERAL_VIEW -> SELECT -> SELEXPR -> FUNCTION -> function info tree
ASTNode selExprAST = (ASTNode) lateralView.getChild(0).getChild(0);
ASTNode functionAST = (ASTNode) selExprAST.getChild(0);
Expand All @@ -118,7 +118,7 @@ public LateralViewPlan(ASTNode lateralView, RelOptCluster cluster, RelNode input

this.lateralViewRel = HiveTableFunctionScan.create(cluster,
TraitsUtil.getDefaultTraitSet(cluster), ImmutableList.of(inputRel), udtfCall,
null, retType, createColumnMappings(inputRel));
null, retType, createColumnMappings(inputRel), isOuter);
}

public static void validateLateralView(ASTNode lateralView) throws SemanticException {
Expand All @@ -128,8 +128,9 @@ public static void validateLateralView(ASTNode lateralView) throws SemanticExcep
}
ASTNode next = (ASTNode) lateralView.getChild(1);
if (!TABLE_ALIAS_TOKEN_TYPES.contains(next.getToken().getType()) &&
HiveParser.TOK_LATERAL_VIEW != next.getToken().getType()) {
throw new SemanticException(ASTErrorUtils.getMsg(
HiveParser.TOK_LATERAL_VIEW != next.getToken().getType() &&
HiveParser.TOK_LATERAL_VIEW_OUTER != next.getToken().getType()) {
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.LATERAL_VIEW_INVALID_CHILD.getMsg(), lateralView));
}
}
Expand Down
16 changes: 16 additions & 0 deletions ql/src/test/queries/clientpositive/lateral_view_outer_cbo.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
CREATE TABLE test (id string, items array<string>);
INSERT INTO test VALUES ('A', array('a', 'b')), ('B', array('c')), ('D', array());

CREATE VIEW v AS
SELECT test.id AS id, item
FROM test
LATERAL VIEW OUTER explode(test.items) lv AS item;

-- CBO plan should contain `outer=[true]` in HiveTableFunctionScan node.
EXPLAIN CBO
SELECT id, item FROM v ORDER BY id, item;
-- Explain plan should contain `outer lateral view: true` in the UDTF Operator
EXPLAIN
SELECT id, item FROM v ORDER BY id, item;
-- One of the output row should be ('D', NULL) since it's an outer lateral view.
SELECT id, item FROM v ORDER BY id, item;
160 changes: 160 additions & 0 deletions ql/src/test/results/clientpositive/llap/lateral_view_outer_cbo.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
PREHOOK: query: CREATE TABLE test (id string, items array<string>)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@test
POSTHOOK: query: CREATE TABLE test (id string, items array<string>)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test
PREHOOK: query: INSERT INTO test VALUES ('A', array('a', 'b')), ('B', array('c')), ('D', array())
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@test
POSTHOOK: query: INSERT INTO test VALUES ('A', array('a', 'b')), ('B', array('c')), ('D', array())
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test
POSTHOOK: Lineage: test.id SCRIPT []
POSTHOOK: Lineage: test.items SCRIPT []
PREHOOK: query: CREATE VIEW v AS
SELECT test.id AS id, item
FROM test
LATERAL VIEW OUTER explode(test.items) lv AS item
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@test
PREHOOK: Output: database:default
PREHOOK: Output: default@v
POSTHOOK: query: CREATE VIEW v AS
SELECT test.id AS id, item
FROM test
LATERAL VIEW OUTER explode(test.items) lv AS item
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@test
POSTHOOK: Output: database:default
POSTHOOK: Output: default@v
POSTHOOK: Lineage: v.id SIMPLE [(test)test.FieldSchema(name:id, type:string, comment:null), ]
POSTHOOK: Lineage: v.item SCRIPT [(test)test.FieldSchema(name:items, type:array<string>, comment:null), ]
PREHOOK: query: EXPLAIN CBO
SELECT id, item FROM v ORDER BY id, item
PREHOOK: type: QUERY
PREHOOK: Input: default@test
PREHOOK: Input: default@v
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN CBO
SELECT id, item FROM v ORDER BY id, item
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test
POSTHOOK: Input: default@v
#### A masked pattern was here ####
CBO PLAN:
HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
HiveProject(id=[$0], item=[$6])
HiveTableFunctionScan(invocation=[LATERAL(explode($1), $0, $1, $2, $3, $4, $5)], rowType=[RecordType(VARCHAR(2147483647) id, VARCHAR(2147483647) ARRAY items, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, VARCHAR(2147483647) lv.item)], outer=[true])
HiveTableScan(table=[[default, test]], table:alias=[test])

PREHOOK: query: EXPLAIN
SELECT id, item FROM v ORDER BY id, item
PREHOOK: type: QUERY
PREHOOK: Input: default@test
PREHOOK: Input: default@v
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN
SELECT id, item FROM v ORDER BY id, item
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test
POSTHOOK: Input: default@v
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: test
properties:
insideView TRUE
Statistics: Num rows: 3 Data size: 6015 Basic stats: COMPLETE Column stats: PARTIAL
Lateral View Forward
Statistics: Num rows: 3 Data size: 6015 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: id (type: string)
outputColumnNames: id
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: PARTIAL
Lateral View Join Operator
outputColumnNames: _col0, _col6
Statistics: Num rows: 3 Data size: 6015 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col0 (type: string), _col6 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: items (type: array<string>)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE Column stats: PARTIAL
UDTF Operator
Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE Column stats: PARTIAL
function name: explode
outer lateral view: true
Lateral View Join Operator
outputColumnNames: _col0, _col6
Statistics: Num rows: 3 Data size: 6015 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col0 (type: string), _col6 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: PARTIAL
Execution mode: llap
LLAP IO: all inputs
Reducer 2
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: SELECT id, item FROM v ORDER BY id, item
PREHOOK: type: QUERY
PREHOOK: Input: default@test
PREHOOK: Input: default@v
#### A masked pattern was here ####
POSTHOOK: query: SELECT id, item FROM v ORDER BY id, item
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test
POSTHOOK: Input: default@v
#### A masked pattern was here ####
A a
A b
B c
D NULL
Loading