Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(schema-compiler): Improve model transpiling performance #9178

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const NATIVE_IS_SUPPORTED = isNativeSupported();

const moduleFileCache = {};

const JINJA_SYNTAX = /{%|%}|{{|}}/ig;
const JINJA_SYNTAX = /{%|%}|{{|}}/;

export class DataSchemaCompiler {
constructor(repository, options = {}) {
Expand Down Expand Up @@ -114,10 +114,10 @@ export class DataSchemaCompiler {
}

transpileFile(file, errorsReport) {
if (R.endsWith('.jinja', file.fileName) ||
(R.endsWith('.yml', file.fileName) || R.endsWith('.yaml', file.fileName))
if (file.fileName.endsWith('.jinja') ||
(file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml'))
// TODO do Jinja syntax check with jinja compiler
&& file.content.match(JINJA_SYNTAX)
&& JINJA_SYNTAX.test(file.content)
) {
if (NATIVE_IS_SUPPORTED !== true) {
throw new Error(
Expand All @@ -129,9 +129,9 @@ export class DataSchemaCompiler {
this.yamlCompiler.getJinjaEngine().loadTemplate(file.fileName, file.content);

return file;
} else if (R.endsWith('.yml', file.fileName) || R.endsWith('.yaml', file.fileName)) {
} else if (file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml')) {
return file;
} else if (R.endsWith('.js', file.fileName)) {
} else if (file.fileName.endsWith('.js')) {
return this.transpileJsFile(file, errorsReport);
} else {
return file;
Expand Down Expand Up @@ -221,13 +221,12 @@ export class DataSchemaCompiler {

compiledFiles[file.fileName] = true;

if (R.endsWith('.js', file.fileName)) {
if (file.fileName.endsWith('.js')) {
this.compileJsFile(file, errorsReport, cubes, contexts, exports, asyncModules, toCompile, compiledFiles);
} else if (R.endsWith('.yml.jinja', file.fileName) || R.endsWith('.yaml.jinja', file.fileName) ||
(
R.endsWith('.yml', file.fileName) || R.endsWith('.yaml', file.fileName)
// TODO do Jinja syntax check with jinja compiler
) && file.content.match(JINJA_SYNTAX)
} else if (file.fileName.endsWith('.yml.jinja') || file.fileName.endsWith('.yaml.jinja') ||
(file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml'))
// TODO do Jinja syntax check with jinja compiler
&& JINJA_SYNTAX.test(file.content)
) {
asyncModules.push(() => this.yamlCompiler.compileYamlWithJinjaFile(
file,
Expand All @@ -241,7 +240,7 @@ export class DataSchemaCompiler {
this.standalone ? {} : this.cloneCompileContextWithGetterAlias(this.compileContext),
this.pythonContext
));
} else if (R.endsWith('.yml', file.fileName) || R.endsWith('.yaml', file.fileName)) {
} else if (file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml')) {
this.yamlCompiler.compileYamlFile(file, errorsReport, cubes, contexts, exports, asyncModules, toCompile, compiledFiles);
}
}
Expand Down
57 changes: 28 additions & 29 deletions packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ import { JinjaEngine, NativeInstance, PythonCtx } from '@cubejs-backend/native';
import type { FileContent } from '@cubejs-backend/shared';

import { getEnv } from '@cubejs-backend/shared';
import { CubePropContextTranspiler, transpiledFields, transpiledFieldsPatterns } from './transpilers';
import {
CubePropContextTranspiler,
transpiledFieldMatch,
transpiledFields,
} from './transpilers';
import { PythonParser } from '../parser/PythonParser';
import { CubeSymbols } from './CubeSymbols';
import { DataSchemaCompiler } from './DataSchemaCompiler';
Expand Down Expand Up @@ -137,35 +141,30 @@ export class YamlCompiler {
}

private transpileYaml(obj, propertyPath, cubeName, errorsReport: ErrorReporter) {
if (transpiledFields.has(propertyPath[propertyPath.length - 1])) {
for (const p of transpiledFieldsPatterns) {
const fullPath = propertyPath.join('.');
if (fullPath.match(p)) {
if (typeof obj === 'string' && ['sql', 'sqlTable'].includes(propertyPath[propertyPath.length - 1])) {
return this.parsePythonIntoArrowFunction(`f"${this.escapeDoubleQuotes(obj)}"`, cubeName, obj, errorsReport);
} else if (typeof obj === 'string') {
return this.parsePythonIntoArrowFunction(obj, cubeName, obj, errorsReport);
} else if (Array.isArray(obj)) {
const resultAst = t.program([t.expressionStatement(t.arrayExpression(obj.map(code => {
let ast: t.Program | t.NullLiteral | t.BooleanLiteral | t.NumericLiteral | null = null;
// Special case for accessPolicy.rowLevel.filter.values and other values-like fields
if (propertyPath[propertyPath.length - 1] === 'values') {
if (typeof code === 'string') {
ast = this.parsePythonAndTranspileToJs(`f"${this.escapeDoubleQuotes(code)}"`, errorsReport);
} else if (typeof code === 'boolean') {
ast = t.booleanLiteral(code);
} else if (typeof code === 'number') {
ast = t.numericLiteral(code);
}
}
if (ast === null) {
ast = this.parsePythonAndTranspileToJs(code, errorsReport);
}
return this.extractProgramBodyIfNeeded(ast);
}).filter(ast => !!ast)))]);
return this.astIntoArrowFunction(resultAst, '', cubeName);
if (transpiledFields.has(propertyPath[propertyPath.length - 1]) && transpiledFieldMatch(propertyPath)) {
if (typeof obj === 'string' && ['sql', 'sqlTable'].includes(propertyPath[propertyPath.length - 1])) {
return this.parsePythonIntoArrowFunction(`f"${this.escapeDoubleQuotes(obj)}"`, cubeName, obj, errorsReport);
} else if (typeof obj === 'string') {
return this.parsePythonIntoArrowFunction(obj, cubeName, obj, errorsReport);
} else if (Array.isArray(obj)) {
const resultAst = t.program([t.expressionStatement(t.arrayExpression(obj.map(code => {
let ast: t.Program | t.NullLiteral | t.BooleanLiteral | t.NumericLiteral | null = null;
// Special case for accessPolicy.rowLevel.filter.values and other values-like fields
if (propertyPath[propertyPath.length - 1] === 'values') {
if (typeof code === 'string') {
ast = this.parsePythonAndTranspileToJs(`f"${this.escapeDoubleQuotes(code)}"`, errorsReport);
} else if (typeof code === 'boolean') {
ast = t.booleanLiteral(code);
} else if (typeof code === 'number') {
ast = t.numericLiteral(code);
}
}
}
if (ast === null) {
ast = this.parsePythonAndTranspileToJs(code, errorsReport);
}
return this.extractProgramBodyIfNeeded(ast);
}).filter(ast => !!ast)))]);
return this.astIntoArrowFunction(resultAst, '', cubeName);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,7 @@ import type { NodePath } from '@babel/traverse';
import type { TranspilerInterface, TraverseObject } from './transpiler.interface';
import type { CubeSymbols } from '../CubeSymbols';
import type { CubeDictionary } from '../CubeDictionary';

/* this list was generated by getTransformPatterns() with additional variants for snake_case */
export const transpiledFieldsPatterns: Array<RegExp> = [
/\.sql$/,
/sql$/,
/(sqlTable|sql_table)$/,
/^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(drillMemberReferences|drillMembers|drill_members)$/,
/^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(orderBy|order_by)\.[0-9]+\.sql$/,
/^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(timeShift|time_shift)\.[0-9]+\.(timeDimension|time_dimension)$/,
/^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by)$/,
/^dimensions\.[_a-zA-Z][_a-zA-Z0-9]*\.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by)$/,
/^(preAggregations|pre_aggregations)\.[_a-zA-Z][_a-zA-Z0-9]*\.indexes\.[_a-zA-Z][_a-zA-Z0-9]*\.columns$/,
/^(preAggregations|pre_aggregations)\.[_a-zA-Z][_a-zA-Z0-9]*\.(timeDimensionReference|timeDimension|time_dimension|segments|dimensions|measures|rollups|segmentReferences|dimensionReferences|measureReferences|rollupReferences)$/,
/^(preAggregations|pre_aggregations)\.[_a-zA-Z][_a-zA-Z0-9]*\.(timeDimensions|time_dimensions)\.\d+\.dimension$/,
/^(preAggregations|pre_aggregations)\.[_a-zA-Z][_a-zA-Z0-9]*\.(outputColumnTypes|output_column_types)\.\d+\.member$/,
/^contextMembers$/,
/^includes$/,
/^excludes$/,
/^hierarchies\.[_a-zA-Z][_a-zA-Z0-9]*\.levels$/,
/^cubes\.[0-9]+\.(joinPath|join_path)$/,
/^(accessPolicy|access_policy)\.[0-9]+\.(rowLevel|row_level)\.filters\.[0-9]+.*\.member$/,
/^(accessPolicy|access_policy)\.[0-9]+\.(rowLevel|row_level)\.filters\.[0-9]+.*\.values$/,
/^(accessPolicy|access_policy)\.[0-9]+\.conditions.[0-9]+\.if$/,
];

export const transpiledFields: Set<String> = new Set<String>();

transpiledFieldsPatterns?.forEach((r) => {
const fields = r.toString().replace(/.*?([_a-zA-Z|][_a-zA-Z0-9|]*)([^_a-zA-Z0-9|]*)$/, '$1').split('|');
fields.forEach((f) => transpiledFields.add(f));
});
import { transpiledFieldMatch, transpiledFields } from './patternMatcher';

export class CubePropContextTranspiler implements TranspilerInterface {
public constructor(
Expand Down Expand Up @@ -94,35 +64,31 @@ export class CubePropContextTranspiler implements TranspilerInterface {
ObjectProperty: (path) => {
if (path.node.key.type === 'Identifier' && transpiledFields.has(path.node.key.name)) {
const fullPath = this.fullPath(path);
// eslint-disable-next-line no-restricted-syntax
for (const p of transpiledFieldsPatterns) {
if (fullPath.match(p)) {
this.transformObjectProperty(path, resolveSymbol);
return;
}
if (transpiledFieldMatch(fullPath)) {
this.transformObjectProperty(path, resolveSymbol);
}
}
}
};
}

protected fullPath(path: NodePath<t.ObjectProperty>): string {
protected fullPath(path: NodePath<t.ObjectProperty>): string[] {
// @ts-ignore
let fp = path?.node?.key?.name || '';
const fp = [path?.node?.key?.name || ''];
let pp: NodePath<t.Node> | null | undefined = path?.parentPath;
while (pp) {
if (pp?.parentPath?.node?.type === 'ArrayExpression') {
fp = `0.${fp}`;
fp.push('0');
pp = pp?.parentPath;
// @ts-ignore
} else if (pp?.parentPath?.node?.key?.type === 'Identifier') {
// @ts-ignore
fp = `${pp?.parentPath?.node?.key?.name || '0'}.${fp}`;
fp.push(`${pp?.parentPath?.node?.key?.name || '0'}`);
pp = pp?.parentPath?.parentPath;
} else break;
}

return fp;
return fp.reverse();
}

protected knownIdentifiersInjectVisitor(field: RegExp | string, resolveSymbol: (name: string) => void): TraverseObject {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ export * from './CubePropContextTranspiler';
export * from './CubeCheckDuplicatePropTranspiler';
export * from './ValidationTranspiler';
export * from './transpiler.interface';
export * from './patternMatcher';
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import { performance } from 'perf_hooks';

/* this list was generated by getTransformPatterns() with additional variants for snake_case */
export const transpiledFieldsPatterns: string[] = [
'sql',
'(sqlTable|sql_table)',
'measures.__id__.(drillMemberReferences|drillMembers|drill_members)',
// 'measures.__id__.(orderBy|order_by).__idx__.sql', // Matched by simple `sql`
'measures.__id__.(timeShift|time_shift).__idx__.(timeDimension|time_dimension)',
'measures.__id__.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by)',
'dimensions.__id__.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by)',
'(preAggregations|pre_aggregations).__id__.indexes.__id__.columns',
'(preAggregations|pre_aggregations).__id__.(timeDimensionReference|timeDimension|time_dimension|segments|dimensions|measures|rollups|segmentReferences|dimensionReferences|measureReferences|rollupReferences)',
'(preAggregations|pre_aggregations).__id__.(timeDimensions|time_dimensions).__idx__.dimension',
'(preAggregations|pre_aggregations).__id__.(outputColumnTypes|output_column_types).__idx__.member',
'contextMembers',
'includes',
'excludes',
'hierarchies.__id__.levels',
'cubes.__idx__.(joinPath|join_path)',
'(accessPolicy|access_policy).__idx__.(rowLevel|row_level).filters.__idx__.member',
'(accessPolicy|access_policy).__idx__.(rowLevel|row_level).filters.__idx__.values',
'(accessPolicy|access_policy).__idx__.conditions.__idx__.if',
];

export const transpiledFields: Set<String> = new Set<String>();

transpiledFieldsPatterns.forEach((r) => {
const fields = r.toString().replace(/.*?([_a-zA-Z|][_a-zA-Z0-9|]*)([^_a-zA-Z0-9|]*)$/, '$1').split('|');
fields.forEach((f) => transpiledFields.add(f));
});

interface Trie {
children: { [key: string]: Trie };
isEnd: boolean;
}

function insertIntoTrie(node: Trie, segments: string[]) {
if (segments.length === 0) {
node.isEnd = true;
return;
}

// eslint-disable-next-line prefer-const
let [first, ...rest] = segments;
if (first.startsWith('(')) {
first = first.slice(1);
first = first.slice(0, -1);
}

const internalSegments = first.split('|');

for (const internalSegment of internalSegments) {
if (!node.children[internalSegment]) {
node.children[internalSegment] = { children: {}, isEnd: false };
}
insertIntoTrie(node.children[internalSegment] as Trie, rest);
}
}

function buildTrie(paths: string[]): Trie {
const root: Trie = { children: {}, isEnd: false };

for (const path of paths) {
const segments = path.split('.');
insertIntoTrie(root, segments);
}

return root;
}

const transpiledFieldsPatternsTree: Trie = buildTrie(transpiledFieldsPatterns);

function matchTree(node: Trie, segments: string[]): boolean {
if (segments.length === 0) {
return node.isEnd;
}

const [first, ...rest] = segments;

if (node.children[first]) {
return matchTree(node.children[first], rest);
} else if (node.children.__idx__ && !Number.isNaN(+first)) {
return matchTree(node.children.__idx__, rest);
} else if (node.children.__id__) {
return matchTree(node.children.__id__, rest);
} else {
return false;
}
}

export const transpiledFieldMatch = (fullPath: string[]): boolean => {
const matchEndings = ['sql', 'sqlTable', 'sql_table'];
const last = fullPath[fullPath.length - 1];

if (matchEndings.includes(last)) {
return true;
}

return matchTree(transpiledFieldsPatternsTree, fullPath);
};
Loading