Lots of changes to the AST Builder.
All checks were successful
continuous-integration/drone/push Build is passing

Adding IDENTIFIER Token type with optional differentiation between functions, constants and variables for syntax highlighting.
Adding substitute for many AST elements.
Properly starting builder: parsing identifiers, functions, variables, array values, properties, numbers, strings and subexpressions.
This commit is contained in:
Adsooi 2022-10-23 15:43:47 +02:00
parent 666d611e95
commit d7704110dd
Signed by: Ad5001
GPG key ID: 7251B1AF90B960F9
5 changed files with 297 additions and 47 deletions

View file

@ -504,7 +504,7 @@ Item {
Generates a list of tokens from the given. Generates a list of tokens from the given.
*/ */
function tokens(text) { function tokens(text) {
let tokenizer = new Parsing.Tokenizer(new Parsing.Input(text), true, false) let tokenizer = new Parsing.Tokenizer(new Parsing.Input(text), true, true, false)
let tokenList = [] let tokenList = []
let token let token
while((token = tokenizer.next()) != null) while((token = tokenizer.next()) != null)

View file

@ -24,10 +24,7 @@
const DERIVATION_PRECISION = 0.01 const DERIVATION_PRECISION = 0.01
const ZERO_EPISLON = 5e-11 // Number under which a variable is considered 0 when dealing with floating point rounding errors. const ZERO_EPISLON = 5e-11 // Number under which a variable is considered 0 when dealing with floating point rounding errors.
const BINARY_OPERATION_PRIORITY = { const BINARY_OPERATION_PRIORITY = Reference.BINARY_OPERATION_PRIORITY
"+": 10, "-": 10,
"*": 20, "/": 20
}
enum ASEType { enum ASEType {
UNKNOWN, UNKNOWN,
@ -417,16 +414,17 @@ class FunctionElement extends AbstractSyntaxElement {
} }
substitute(variable, substitution) { substitute(variable, substitution) {
return new FunctionElement(this.functionName, this.args.map(arg => arg.substitute(variable, substitution)))
} }
derivation(variable) { derivation(variable) {
//TODO: Use DERIVATIVES elements in reference. //TODO: Use DERIVATIVES elements in reference.
return new DerivationElement([this, variable]) return new DerivationElement([this, new Variable(variable)])
} }
integral(variable) { integral(variable) {
//TODO: Use INTEGRALS elements in reference. //TODO: Use INTEGRALS elements in reference.
return new IntegralElement([this, variable]) return new IntegralElement([this, new Variable(variable)])
} }
toEditableString() { toEditableString() {
@ -487,12 +485,20 @@ class DerivationElement extends FunctionElement {
} }
simplify() { simplify() {
return new DerivationElement([this.args[0].simplify(variables), this.args[1]]) return this.args[0].simplify().derivation(this.args[1].variableName).simplify()
}
substitute(variable, substitution) {
if(variable == this.args[1].variableName) {
// Simplifu,
return this.simplify().substitute(variable, substitution)
} else
return new DerivationElement([this.args[0].substitute(variable, substitution), this.args[1]])
} }
integral(variable) { integral(variable) {
// Check if we're integrating and derivating by the same variable // Check if we're integrating and derivating by the same variable
return variable.variableName == this.args[1].variableName ? this.args[1] : super(variable) return variable == this.args[1].variableName ? this.args[1] : super(variable)
} }
toLatex() { toLatex() {
@ -557,26 +563,44 @@ class IntegralElement extends FunctionElement {
simplify() { simplify() {
// TODO: When full derivation and integrals are implemented, use dedicated functions for simplification. // TODO: When full derivation and integrals are implemented, use dedicated functions for simplification.
let func = this.args[this.args.length-2].simplify(variables) let func = this.f.simplify(variables)
let newElem let newElem
if(func.isConstant() && this.args.length == 4) if(func.isConstant() && this.args.length == 4)
// Simplify integral. // Simplify integral.
newElem = new BinaryOperation( newElem = new BinaryOperation(
new BinaryOperation(this.args[1], '-', this.args[0]).simplify(), new BinaryOperation(this.b, '-', this.a).simplify(),
'*', '*',
func func
) ).simplify()
else else {
newElem = new IntegralElement(this.args.length == 4 ? let integrated = this.func.integral(this.d.variableName)
[this.a.simplify(), this.b.simplify(), func, this.d] : newElem = new BinaryOperation(
[func, this.d] integrated.substitute(this.d.variableName, this.b),
) '-',
integrated.substitute(this.d.variableName, this.a)
).simplify()
//newElem = new IntegralElement(this.args.length == 4 ?
// [this.a.simplify(), this.b.simplify(), func, this.d] :
// [func, this.d]
//)
}
return newElem return newElem
} }
substitute(variable, substitution) {
if(variable == this.args[1].variableName) {
// Simplify
return this.simplify().substitute(variable, substitution)
} else
return new IntegralElement(this.args.length == 4 ?
[this.a.substitute(variable, substitution), this.b.simplify(variable, substitution),
this.f.substitute(variable, substitution), this.d] :
[this.f.substitute(variable, substitution), this.d])
}
derivation(variable) { derivation(variable) {
// Check if we're integrating and derivating by the same variable // Check if we're integrating and derivating by the same variable
return variable.variableName == this.args[1].variableName ? this.args[1] : super(variable) return variable == this.args[1].variableName ? this.args[1] : super(variable)
} }
toLatex() { toLatex() {
@ -689,6 +713,7 @@ class BinaryOperation extends AbstractSyntaxElement {
throw new EvalError("Unknown operator " + ope + ".") throw new EvalError("Unknown operator " + ope + ".")
} }
} }
// TODO: Check for all nearby operations simplifications
return result return result
} }

View file

@ -23,26 +23,217 @@
class ExpressionBuilder { class ExpressionBuilder {
constructor(tokenizer) { constructor(tokenizer, rememberTokens = false) {
this.tokenizer = tokenizer; this.tokenizer = tokenizer;
if(tokenizer.tokenizeWhitespaces) {
console.warn('WARNING: The ExpressionTokenizer for ExpressionBuilder was initialized with whitespace support. Disabled.')
tokenizer.tokenizeWhitespaces = false
}
if(tokenizer.differentiateIdentifiers) {
console.warn('WARNING: The ExpressionTokenizer for ExpressionBuilder was initialized with identifier differentiation support. Disabled.')
tokenizer.differentiateIdentifiers = false
}
this.tokens = []
this.rememberTokens = rememberTokens
this.stack = [] this.stack = []
} }
parseExpression(delimitors = '') { /**
// Parse a sequence of operations, and orders them based on OPERATION_PRIORITY. * Parses an expression until the end is reached.
let elements = [] *
let operators = [] * @throws {Error} When an unexpected token is encountered.
let firstToken = this.tokenizer.peek(); * @returns {AbstractSyntaxElement}
if(firstToken.type == TK.TokenType.OPERATOR) // First operations. */
if(firstToken.value == "-") { parseFullExpression() {
// TODO: Set initial argument. this.parseExpression([])
this.tokenizer.skip(TK.TokenType.OPERATOR) return this.stack.pop()
}
/**
* Parses an expression until the end is reached.
*
* @param {string} punctuationDelimitators - List of deliminators that ends the expression
* @throws {Error} When an unexpected token is encountered.
*/
parseExpression(punctuationDelimitators = []) {
let token
while((token = this.tokenizer.peek()) != null) {
if(token.type == TK.TokenType.PUNCT && token.value != '(') // Still allow expression creation.
if(punctuationDelimitators.includes(token.value))
break
else if(punctuationDelimitators.length == 1)
this.tokenizer.raise(`Unexpected ${token.value}. Expected '${punctuationDelimitators[0]}'.`)
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected one of: '${punctuationDelimitators[0].join("', '")}'.`)
else
this.handleSingle()
}
if(token == null && punctuationDelimitators.length > 0)
if(punctuationDelimitators.length == 1)
this.tokenizer.raise(`Unexpected end of expression. Expected '${punctuationDelimitators[0]}'.`)
else
this.tokenizer.raise(`Unexpected end of expression. Expected one of: '${punctuationDelimitators[0].join("', '")}'.`)
if(this.stack.length == 0)
if(token == null)
this.tokenizer.raise(`Unexpected end of expression. Expected at least one element.`)
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected at least one element.`)
if(this.stack.length > 1)
this.tokenizer.raise('Invalid expression.')
}
/**
* Handles a single (assumed non-null) token based on its type.
*
* @param {AbstractSyntaxElement} token
* @throws {Error} When an unexpected token is encountered.
*/
handleSingle(token) {
switch(token.type) {
case TK.TokenType.IDENTIFIER:
this.parseIdentifier()
break
case TK.TokenType.OPERATOR:
if(this.stack.length == 0 && Reference.UNARY_OPERATORS.includes(token.value))
this.parseSingleOperation()
else if(this.stack.length > 0 && Reference.BINARY_OPERATORS.includes(token.value))
this.parseBinaryOperations()
else if(this.stack.length > 0 && Reference.TERTIARY_OPERATORS.includes(token.value))
this.parseTertiaryOperation()
break
case TK.TokenType.NUMBER:
this.stack.push(new AST.NumberElement(this.tokenizer.next().value))
break
case TK.TokenType.STRING:
this.stack.push(new AST.StringElement(this.tokenizer.next().value))
break
case TK.TokenType.PUNCT:
if(token.value == '(') {
this.tokenizer.skip(TK.TokenType.PUNCT, '(') // Skip the opening parentheses.
this.parseExpression([')'])
} else } else
tokenizer.input.raise(`Invalid operator ${firstToken.value} at begining of statement.`) this.tokenizer.raise(`Unexpected ${token.value}. Expected a value.`)
else { break
default:
this.tokenizer.raise(`Unknown token provided: ${token.value}.`)
break
}
if(this.rememberTokens)
this.tokens.push(token)
}
/**
* Parses a single token element.
*
* @throws {Error} When an unexpected token is encountered.
*/
parseSingle() {
let token = this.tokenizer.peek()
if(token != null)
this.handleSingle(token)
}
parseIdentifier() {
// Assuming the right type.
let token = this.tokenizer.read(TK.TokenType.IDENTIFIER)
if(Reference.CONSTANTS_LIST.includes(token.value))
this.stack.push(new AST.Constant(token.value))
else
this.stack.push(new AST.Variable(token.value))
this.checkIdentifierFollowupTokens()
}
/**
* Parses a function based on a previously called identifier.
* NOTE: Expects to have at least one stack element for function name.
*/
parseFunction() {
// TODO: Implement dynamic functions values instead of being based on names.
let functionValue = this.stack.pop()
if(!(functionValue instanceof AST.Variable))
this.tokenizer.raise("Executing functions from dynamic variables is not implemented".)
let functionName = functionValue.variableName
let args = []
let token
while((token = this.tokenizer.peek()) != null && token.value != ')') {
this.tokenizer.skip(TK.TokenType.PUNCT) // Skip the opening parenthesis and the commas.
parseExpression([',',')'])
args.push(this.stack.pop())
}
if(token == null)
this.tokenizer.raise(`Unexpected end of expression. Expected ')'.`)
if(this.functionName == 'derivation')
this.stack.push(new AST.DerivationElement(args))
else if(this.functionName == 'integral')
this.stack.push(new AST.IntegralElement(args))
else
this.stack.push(new AST.FunctionElement(functionName, args))
}
/**
* Parses an object property based on a previously called identifier.
* NOTE: Expects to have at least one stack element for property object name.
*/
parseProperty() {
this.tokenizer.skip(TK.TokenType.PUNCT, '.') // Skipping the dot.
let token = this.tokenizer.read(TK.TokenType.IDENTIFIER)
this.stack.push(new AST.PropertyElement(this.stack.pop(), token.value))
this.checkIdentifierFollowupTokens()
}
/**
* Parses the value of the element of an array at a given index based on a previously called identifier.
* NOTE: Expects to have at least one stack element for property object name.
*/
parseArrayValue() {
this.tokenizer.skip(TK.TokenType.PUNCT, '[') // Skipping the array opener.
let obj = this.stack.pop()
parseExpression([']'])
this.stack.push(new AST.ArrayElement(obj, this.stack.pop()))
this.checkIdentifierFollowupTokens()
}
/**
* Checks for followup tokens following a value getting.
* E.g: getting the property of an object, an array member, or calling a function.
* NOTE: Expects to have at least one stack element for previous calling object.
*/
checkIdentifierFollowupTokens() {
let peeked = this.tokenizer.peek()
if(peeked != null && peeked.type == TK.TokenType.PUNCT)
switch(peeked.value) {
case '(':
// Function call
this.parseFunction()
break
case '.':
// Member property
this.parseProperty()
break
case '[':
// Array value
this.parseArrayValue()
break
} }
} }
parseOperation()` parseBinaryOperations() {
if((this.tokenizer.peek().value in AST.BINARY_OPERATION_PRIORITY))
throw new Error("Current token is not a binary operator.")
if(this.stack.length == 0)
throw new Error(`The operator ${this.tokenizer.peek().value} can only be used after a value.`)
// Parse a sequence of operations, and orders them based on OPERATION_PRIORITY.
let elements = [this.stack.pop()]
let operators = [this.tokenizer.next()]
let token
while((token = this.tokenizer.peek()) != null) {
}
}
} }

View file

@ -28,6 +28,21 @@ const CONSTANTS = {
}; };
const CONSTANTS_LIST = Object.keys(CONSTANTS); const CONSTANTS_LIST = Object.keys(CONSTANTS);
const UNARY_OPERATORS = []
const BINARY_OPERATION_PRIORITY = {
'and': 0, 'or': 0, 'in': 0,
'==': 10, '!=' : 10,
'>': 10, '<': 10, '>=': 10, '<=': 10,
'%': 20, '||': 20,
'+': 30, '-': 30,
'*': 40, '/': 40,
'^': 50
}
const BINARY_OPERATORS = Object.keys(BINARY_OPERATION_PRIORITY)
const TERTIARY_OPERATORS = ['?']
const FUNCTIONS = { const FUNCTIONS = {
"abs": Math.abs, "abs": Math.abs,
"acos": Math.acos, "acos": Math.acos,

View file

@ -23,13 +23,14 @@
const WHITESPACES = " \t\n\r" const WHITESPACES = " \t\n\r"
const STRING_LIMITORS = '"\'`'; const STRING_LIMITORS = '"\'`';
const OPERATORS = "+-*/^%?:=!><"; const OPERATORS = "+-*/^%?:=!><";
const PUNCTUTATION = "()[]{},."; const PUNCTUTATION = "()[],.";
const NUMBER_CHARS = "0123456789" const NUMBER_CHARS = "0123456789"
const IDENTIFIER_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789_₀₁₂₃₄₅₆₇₈₉αβγδεζηθκλμξρςστφχψωₐₑₒₓₔₕₖₗₘₙₚₛₜ" const IDENTIFIER_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789_₀₁₂₃₄₅₆₇₈₉αβγδεζηθκλμξρςστφχψωₐₑₒₓₔₕₖₗₘₙₚₛₜ"
var TokenType = { var TokenType = {
// Expression type // Expression type
"WHITESPACE": "WHITESPACE", "WHITESPACE": "WHITESPACE",
"IDENTIFIER": "IDENTIFIER",
"VARIABLE": "VARIABLE", "VARIABLE": "VARIABLE",
"CONSTANT": "CONSTANT", "CONSTANT": "CONSTANT",
"FUNCTION": "FUNCTION", "FUNCTION": "FUNCTION",
@ -49,10 +50,11 @@ class Token {
} }
class ExpressionTokenizer { class ExpressionTokenizer {
constructor(input, tokenizeWhitespaces = false, errorOnUnknown = true) { constructor(input, tokenizeWhitespaces = false, differentiateIdentifiers = false, errorOnUnknown = true) {
this.input = input; this.input = input
this.currentToken = null; this.currentToken = null
this.tokenizeWhitespaces = tokenizeWhitespaces this.tokenizeWhitespaces = tokenizeWhitespaces
this.differentiateIdentifiers = differentiateIdentifiers
this.errorOnUnknown = errorOnUnknown this.errorOnUnknown = errorOnUnknown
} }
@ -115,10 +117,16 @@ class ExpressionTokenizer {
while(!this.input.atEnd() && IDENTIFIER_CHARS.includes(this.input.peek().toLowerCase())) { while(!this.input.atEnd() && IDENTIFIER_CHARS.includes(this.input.peek().toLowerCase())) {
identifier += this.input.next(); identifier += this.input.next();
} }
if(Reference.CONSTANTS_LIST.includes(identifier.toLowerCase())) { let identifierLC = identifier.toLowerCase()
return new Token(TokenType.CONSTANT, identifier.toLowerCase(), this.input.position-identifier.length) if(Reference.CONSTANTS_LIST.includes(identifierLC)) {
} else if(Reference.FUNCTIONS_LIST.includes(identifier.toLowerCase())) { return new Token(TokenType.CONSTANT, identifierLC, this.input.position-identifier.length)
return new Token(TokenType.FUNCTION, identifier.toLowerCase(), this.input.position-identifier.length) } else if(Reference.FUNCTIONS_LIST.includes(identifierLC)) {
return new Token(TokenType.FUNCTION, identifierLC, this.input.position-identifier.length)
} else if(Reference.UNARY_OPERATORS.includes(identifierLC) ||
Reference.BINARY_OPERATORS.includes(identifierLC) ||
Reference.TERTIARY_OPERATORS.includes(identifierLC)
) {
return new Token(TokenType.OPERATOR, identifierLC, this.input.position-identifier.length)
} else { } else {
return new Token(TokenType.VARIABLE, identifier, this.input.position-identifier.length) return new Token(TokenType.VARIABLE, identifier, this.input.position-identifier.length)
} }
@ -137,7 +145,7 @@ class ExpressionTokenizer {
if(Reference.CONSTANTS_LIST.includes(c)) return new Token(TokenType.CONSTANT, this.input.next(), this.input.position-1); if(Reference.CONSTANTS_LIST.includes(c)) return new Token(TokenType.CONSTANT, this.input.next(), this.input.position-1);
if(PUNCTUTATION.includes(c)) return new Token(TokenType.PUNCT, this.input.next(), this.input.position-1); if(PUNCTUTATION.includes(c)) return new Token(TokenType.PUNCT, this.input.next(), this.input.position-1);
if(this.errorOnUnknown) if(this.errorOnUnknown)
this.input.throw("Unknown token character " + c) this.raise("Unknown token character " + c)
else else
return new Token(TokenType.UNKNOWN, this.input.next(), this.input.position-1); return new Token(TokenType.UNKNOWN, this.input.next(), this.input.position-1);
} }
@ -157,13 +165,24 @@ class ExpressionTokenizer {
return tmp; return tmp;
} }
read(type, value) {
let next = this.next()
if(type != null && next.type != type)
this.raise(`Unexpected ${next.type.toLowerCase()} ${next.value}. Expected type was ${type.toLowerCase()}.`);
if(value != null && next.value == value)
this.raise(`Unexpected ${next.type.toLowerCase()} ${next.value}. Expected value was ${value}.`);
return next
}
atEnd() { atEnd() {
return this.peek() == null; return this.peek() == null;
} }
skip(type) { skip(type, value) {
let next = this.next(); this.read(type, value)
if(next.type != type) }
input.raise("Unexpected token " + next.type.toLowerCase() + ' "' + next.value + '". Expected ' + type.toLowerCase());
raise(message) {
this.input.raise(message)
} }
} }