Compare commits

...
Sign in to create a new pull request.

4 commits

Author SHA1 Message Date
64d5f11ff9
FlatPlusOperation for future simplifications! 2022-10-25 16:26:08 +02:00
80d0dad63a
Implementing parseBinaryOperator 2022-10-23 18:09:20 +02:00
d7704110dd
Lots of changes to the AST Builder.
Adding IDENTIFIER Token type with optional differentiation between functions, constants and variables for syntax highlighting.
Adding substitute for many AST elements.
Properly starting builder: parsing identifiers, functions, variables, array values, properties, numbers, strings and subexpressions.
2022-10-23 15:43:47 +02:00
666d611e95
Fixing up old stuff, cleaning up and standardizing AST. 2022-10-22 14:06:03 +02:00
6 changed files with 1139 additions and 465 deletions
LogarithmPlotter/qml/eu/ad5001/LogarithmPlotter

View file

@ -504,7 +504,7 @@ Item {
Generates a list of tokens from the given.
*/
function tokens(text) {
let tokenizer = new Parsing.Tokenizer(new Parsing.Input(text), true, false)
let tokenizer = new Parsing.Tokenizer(new Parsing.Input(text), true, true, false)
let tokenList = []
let token
while((token = tokenizer.next()) != null)

View file

@ -103,27 +103,28 @@ function functionToLatex(f, args) {
* @param {string} vari - variable to convert
* @returns {string}
*/
function variable(vari) {
let unicodechars = ["α","β","γ","δ","ε","ζ","η",
"π","θ","κ","λ","μ","ξ","ρ",
"ς","σ","τ","φ","χ","ψ","ω",
"Γ","Δ","Θ","Λ","Ξ","Π","Σ",
"Φ","Ψ","Ω","ₐ","ₑ","ₒ","ₓ",
"ₕ","ₖ","ₗ","ₘ","ₙ","ₚ","ₛ",
"ₜ","¹","²","³","⁴","⁵","⁶",
"⁷","⁸","⁹","⁰","₁","₂","₃",
"₄","₅","₆","₇","₈","₉","₀",
"pi"]
let equivalchars = ["\\alpha","\\beta","\\gamma","\\delta","\\epsilon","\\zeta","\\eta",
"\\pi","\\theta","\\kappa","\\lambda","\\mu","\\xi","\\rho",
"\\sigma","\\sigma","\\tau","\\phi","\\chi","\\psi","\\omega",
"\\Gamma","\\Delta","\\Theta","\\Lambda","\\Xi","\\Pi","\\Sigma",
"\\Phy","\\Psi","\\Omega","{}_{a}","{}_{e}","{}_{o}","{}_{x}",
"{}_{h}","{}_{k}","{}_{l}","{}_{m}","{}_{n}","{}_{p}","{}_{s}",
"{}_{t}","{}^{1}","{}^{2}","{}^{3}","{}^{4}","{}^{5}","{}^{6}",
"{}^{7}","{}^{8}","{}^{9}","{}^{0}","{}_{1}","{}_{2}","{}_{3}",
"{}_{4}","{}_{5}","{}_{6}","{}_{7}","{}_{8}","{}_{9}","{}_{0}",
let unicodechars = ["α","β","γ","δ","ε","ζ","η",
"π","θ","κ","λ","μ","ξ","ρ",
"ς","σ","τ","φ","χ","ψ","ω",
"Γ","Δ","Θ","Λ","Ξ","Π","Σ",
"Φ","Ψ","Ω","ₐ","ₑ","ₒ","ₓ",
"ₕ","ₖ","ₗ","ₘ","ₙ","ₚ","ₛ",
"ₜ","¹","²","³","⁴","⁵","⁶",
"⁷","⁸","⁹","⁰","₁","₂","₃",
"₄","₅","₆","₇","₈","₉","₀",
"pi"]
let equivalchars = ["\\alpha","\\beta","\\gamma","\\delta","\\epsilon","\\zeta","\\eta",
"\\pi","\\theta","\\kappa","\\lambda","\\mu","\\xi","\\rho",
"\\sigma","\\sigma","\\tau","\\phi","\\chi","\\psi","\\omega",
"\\Gamma","\\Delta","\\Theta","\\Lambda","\\Xi","\\Pi","\\Sigma",
"\\Phy","\\Psi","\\Omega","{}_{a}","{}_{e}","{}_{o}","{}_{x}",
"{}_{h}","{}_{k}","{}_{l}","{}_{m}","{}_{n}","{}_{p}","{}_{s}",
"{}_{t}","{}^{1}","{}^{2}","{}^{3}","{}^{4}","{}^{5}","{}^{6}",
"{}^{7}","{}^{8}","{}^{9}","{}^{0}","{}_{1}","{}_{2}","{}_{3}",
"{}_{4}","{}_{5}","{}_{6}","{}_{7}","{}_{8}","{}_{9}","{}_{0}",
"\\pi"]
function variable(vari) {
for(let i = 0; i < unicodechars.length; i++) {
//console.log(vari, unicodechars[i], equivalchars[i]);
if(vari.includes(unicodechars[i]))

View file

@ -23,25 +23,258 @@
class ExpressionBuilder {
constructor(tokenizer) {
constructor(tokenizer, rememberTokens = false) {
this.tokenizer = tokenizer;
}
parseExpression(delimitors = '') {
// Parse a sequence of operations, and orders them based on OPERATION_PRIORITY.
let elements = []
let operators = []
let firstToken = this.tokenizer.peek();
if(firstToken.type == TK.TokenType.OPERATOR) // First operations.
if(firstToken.value == "-") {
// TODO: Set initial argument.
this.tokenizer.skip(TK.TokenType.OPERATOR)
} else
tokenizer.input.raise(`Invalid operator ${firstToken.value} at begining of statement.`)
else {
if(tokenizer.tokenizeWhitespaces) {
console.warn('WARNING: The ExpressionTokenizer for ExpressionBuilder was initialized with whitespace support. Disabled.')
tokenizer.tokenizeWhitespaces = false
}
if(tokenizer.differentiateIdentifiers) {
console.warn('WARNING: The ExpressionTokenizer for ExpressionBuilder was initialized with identifier differentiation support. Disabled.')
tokenizer.differentiateIdentifiers = false
}
this.tokens = []
this.rememberTokens = rememberTokens
this.stack = []
}
parseOperation()`
/**
* Parses an expression until the end is reached.
*
* @throws {Error} When an unexpected token is encountered.
* @returns {AbstractSyntaxElement}
*/
parseFullExpression() {
this.parseExpression([])
return this.stack.pop()
}
/**
* Parses an expression until the end is reached.
*
* @param {string} punctuationDelimitators - List of deliminators that ends the expression
* @throws {Error} When an unexpected token is encountered.
*/
parseExpression(punctuationDelimitators = []) {
let token
while((token = this.tokenizer.peek()) != null) {
if(token.type == TK.TokenType.PUNCT && token.value != '(') // Still allow expression creation.
if(punctuationDelimitators.includes(token.value))
break
else if(punctuationDelimitators.length == 1)
this.tokenizer.raise(`Unexpected ${token.value}. Expected '${punctuationDelimitators[0]}'.`)
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected one of: '${punctuationDelimitators[0].join("', '")}'.`)
else
this.handleSingle()
}
if(token == null && punctuationDelimitators.length > 0)
if(punctuationDelimitators.length == 1)
this.tokenizer.raise(`Unexpected end of expression. Expected '${punctuationDelimitators[0]}'.`)
else
this.tokenizer.raise(`Unexpected end of expression. Expected one of: '${punctuationDelimitators[0].join("', '")}'.`)
if(this.stack.length == 0)
if(token == null)
this.tokenizer.raise(`Unexpected end of expression. Expected at least one element.`)
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected at least one element.`)
if(this.stack.length > 1)
this.tokenizer.raise('Invalid expression.')
}
/**
* Handles a single (assumed non-null) token based on its type.
*
* @param {AbstractSyntaxElement} token
* @throws {Error} When an unexpected token is encountered.
*/
handleSingle(token) {
switch(token.type) {
case TK.TokenType.NUMBER:
this.stack.push(new AST.NumberElement(this.tokenizer.next().value))
break
case TK.TokenType.STRING:
this.stack.push(new AST.StringElement(this.tokenizer.next().value))
break
case TK.TokenType.IDENTIFIER:
case TK.TokenType.OPERATOR:
if(this.stack.length == 0 && Reference.UNARY_OPERATORS.includes(token.value))
this.parseSingleOperation()
else if(this.stack.length > 0 && Reference.BINARY_OPERATORS.includes(token.value))
this.parseBinaryOperations()
else if(this.stack.length > 0 && Reference.TERTIARY_OPERATORS.includes(token.value))
this.parseTertiaryOperation()
else if(token.type == TK.TokenType.IDENTIFIER)
// If it isn't a reserved keyword for operators (e.g and, or...), then it *is* and identifier.
this.parseIdentifier()
else
this.tokenizer.raise(`Unknown operator: ${token.value}.`)
break
case TK.TokenType.PUNCT:
if(token.value == '(') {
this.tokenizer.skip(TK.TokenType.PUNCT, '(') // Skip the opening parentheses.
this.parseExpression([')'])
} else
this.tokenizer.raise(`Unexpected ${token.value}. Expected a value.`)
break
default:
this.tokenizer.raise(`Unknown token provided: ${token.value}.`)
break
}
if(this.rememberTokens)
this.tokens.push(token)
}
/**
* Parses a single token element.
*
* @throws {Error} When an unexpected token is encountered.
*/
parseSingle() {
let token = this.tokenizer.peek()
if(token != null)
this.handleSingle(token)
}
parseIdentifier() {
// Assuming the right type.
let token = this.tokenizer.read(TK.TokenType.IDENTIFIER)
if(Reference.CONSTANTS_LIST.includes(token.value))
this.stack.push(new AST.Constant(token.value))
else
this.stack.push(new AST.Variable(token.value))
this.checkIdentifierFollowupTokens()
}
/**
* Parses a function based on a previously called identifier.
* NOTE: Expects to have at least one stack element for function name.
*/
parseFunction() {
// TODO: Implement dynamic functions values instead of being based on names.
let functionValue = this.stack.pop()
if(!(functionValue instanceof AST.Variable))
this.tokenizer.raise("Executing functions from dynamic variables is not implemented".)
let functionName = functionValue.variableName
let args = []
let token
while((token = this.tokenizer.peek()) != null && token.value != ')') {
this.tokenizer.skip(TK.TokenType.PUNCT) // Skip the opening parenthesis and the commas.
parseExpression([',',')'])
args.push(this.stack.pop())
}
if(token == null)
this.tokenizer.raise(`Unexpected end of expression. Expected ')'.`)
if(this.functionName == 'derivation')
this.stack.push(new AST.DerivationElement(args))
else if(this.functionName == 'integral')
this.stack.push(new AST.IntegralElement(args))
else
this.stack.push(new AST.FunctionElement(functionName, args))
}
/**
* Parses an object property based on a previously called identifier.
* NOTE: Expects to have at least one stack element for property object name.
*/
parseProperty() {
this.tokenizer.skip(TK.TokenType.PUNCT, '.') // Skipping the dot.
let token = this.tokenizer.read(TK.TokenType.IDENTIFIER)
this.stack.push(new AST.PropertyElement(this.stack.pop(), token.value))
this.checkIdentifierFollowupTokens()
}
/**
* Parses the value of the element of an array at a given index based on a previously called identifier.
* NOTE: Expects to have at least one stack element for property object name.
*/
parseArrayValue() {
this.tokenizer.skip(TK.TokenType.PUNCT, '[') // Skipping the array opener.
let obj = this.stack.pop()
parseExpression([']'])
this.stack.push(new AST.ArrayElement(obj, this.stack.pop()))
this.checkIdentifierFollowupTokens()
}
/**
* Checks for followup tokens following a value getting.
* E.g: getting the property of an object, an array member, or calling a function.^
* NOTE: Expects to have at least one stack element for previous calling object.
*/
checkIdentifierFollowupTokens() {
let peeked = this.tokenizer.peek()
if(peeked != null && peeked.type == TK.TokenType.PUNCT)
switch(peeked.value) {
case '(':
// Function call
this.parseFunction()
break
case '.':
// Member property
this.parseProperty()
break
case '[':
// Array value
this.parseArrayValue()
break
}
}
parseBinaryOperations() {
if((this.tokenizer.peek().value in AST.BINARY_OPERATION_PRIORITY))
throw new Error("Current token is not a binary operator.")
if(this.stack.length == 0)
throw new Error(`The operator ${this.tokenizer.peek().value} can only be used after a value.`)
// Parse a sequence of operations, and orders them based on OPERATION_PRIORITY.
let elements = [this.stack.pop()]
let operators = [this.tokenizer.next().value]
let nextIsOperator = false
let token
while((token = this.tokenizer.peek()) != null) {
if(nextIsOperator)
if(token.type == TK.TokenType.PUNCT)
if(token.value == ')')
// Don't skip that token, but stop the parsing,
// as it may be an unopened expression.
break
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected an operator, or ')'.`)
else if(token.type == TK.TokenType.IDENTIFIER)
if(Reference.BINARY_OPERATORS.includes(token.value))
this.operartors.push(this.tokenizer.next().value)
else if(Reference.TERTIARY_OPERATORS.includes(token.value))
// Break to let the hand back to the parser.
break
else if(Reference.UNARY_OPERATORS.includes(token.value))
this.tokenizer.raise(`Invalid use of operator ${token.value} after ${elements.pop().value}.`)
else
this.tokenizer.raise(`Unknown operator: ${token.value}.`)
else {
handleSingle(token)
let value = this.stack.pop()
if(token.value != '(' && (value instanceof AST.BinaryOperation || value instanceof AST.TertiaryOperation))
// In case you chain something like 'or' and '*'
// Unary operations are exempted from this as they are used for a single value.
this.tokenizer.raise(`Cannot chain operations ${operators.pop().value} and ${value.ope}.`)
elements.push(value)
}
}
// Now we have our full chain, we need to match by operation priority
// TODO: Implement FlatBinaryOperations for better simplification and smarter trees.
for(let ope of AST.BINARY_OPERATORS)
while(operators.includes(ope)) { // Skip if not in priority.
let index = operators.indexOf(ope)
operators.splice(index, 1) // Remove operator from array.
elements.splice(index, 2, new BinaryOperation(elements[index], ope, elements[index+1]))
}
// At the end, there should be no more operators and only one element.
this.stack.push(elements.pop())
}
}

View file

@ -28,6 +28,23 @@ const CONSTANTS = {
};
const CONSTANTS_LIST = Object.keys(CONSTANTS);
const UNARY_OPERATORS = []
const BINARY_OPERATION_PRIORITY = {
'and': 0, 'or': 0, 'in': 0,
'==': 10, '!=' : 10,
'>': 10, '<': 10, '>=': 10, '<=': 10,
'%': 20, '||': 20,
'+': 30, '-': 30,
'*': 40, '/': 40,
'^': 50
}
// Sorted by priority (most to least)
const BINARY_OPERATORS = Object.keys(BINARY_OPERATION_PRIORITY).sort((ope1, ope2) => BINARY_OPERATION_PRIORITY[ope2]-BINARY_OPERATION_PRIORITY[ope1])
const TERTIARY_OPERATORS = ['?']
const FUNCTIONS = {
"abs": Math.abs,
"acos": Math.acos,
@ -71,15 +88,15 @@ const FUNCTIONS_LIST = Object.keys(FUNCTIONS);
// TODO: Complete
const DERIVATIVES = {
"abs": "abs(<1>)/<1>",
"acos": "-derivate(<1>)/sqrt(1-(<1>)^2)",
"acosh": "derivate(<1>)/sqrt((<1>)^2-1)",
"asin": "derivate(<1>)/sqrt(1-(<1>)^2)",
"asinh": "derivate(<1>)/sqrt((<1>)^2+1)",
"atan": "derivate(<1>)/(1+(<1>)^2)",
"acos": "-derivative(<1>)/sqrt(1-(<1>)^2)",
"acosh": "derivative(<1>)/sqrt((<1>)^2-1)",
"asin": "derivative(<1>)/sqrt(1-(<1>)^2)",
"asinh": "derivative(<1>)/sqrt((<1>)^2+1)",
"atan": "derivative(<1>)/(1+(<1>)^2)",
"atan2": "",
}
const INTEGRALS = {
"abs": "integrate(<1>)*sign(<1>)",
"abs": "integral(<1>)*sign(<1>)",
"acos": "",
"acosh": "",
"asin": "",

View file

@ -23,13 +23,14 @@
const WHITESPACES = " \t\n\r"
const STRING_LIMITORS = '"\'`';
const OPERATORS = "+-*/^%?:=!><";
const PUNCTUTATION = "()[]{},.";
const PUNCTUTATION = "()[],.";
const NUMBER_CHARS = "0123456789"
const IDENTIFIER_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789_₀₁₂₃₄₅₆₇₈₉αβγδεζηθκλμξρςστφχψωₐₑₒₓₔₕₖₗₘₙₚₛₜ"
var TokenType = {
// Expression type
"WHITESPACE": "WHITESPACE",
"IDENTIFIER": "IDENTIFIER",
"VARIABLE": "VARIABLE",
"CONSTANT": "CONSTANT",
"FUNCTION": "FUNCTION",
@ -49,10 +50,11 @@ class Token {
}
class ExpressionTokenizer {
constructor(input, tokenizeWhitespaces = false, errorOnUnknown = true) {
this.input = input;
this.currentToken = null;
constructor(input, tokenizeWhitespaces = false, differentiateIdentifiers = false, errorOnUnknown = true) {
this.input = input
this.currentToken = null
this.tokenizeWhitespaces = tokenizeWhitespaces
this.differentiateIdentifiers = differentiateIdentifiers
this.errorOnUnknown = errorOnUnknown
}
@ -115,10 +117,16 @@ class ExpressionTokenizer {
while(!this.input.atEnd() && IDENTIFIER_CHARS.includes(this.input.peek().toLowerCase())) {
identifier += this.input.next();
}
if(Reference.CONSTANTS_LIST.includes(identifier.toLowerCase())) {
return new Token(TokenType.CONSTANT, identifier.toLowerCase(), this.input.position-identifier.length)
} else if(Reference.FUNCTIONS_LIST.includes(identifier.toLowerCase())) {
return new Token(TokenType.FUNCTION, identifier.toLowerCase(), this.input.position-identifier.length)
let identifierLC = identifier.toLowerCase()
if(Reference.CONSTANTS_LIST.includes(identifierLC)) {
return new Token(TokenType.CONSTANT, identifierLC, this.input.position-identifier.length)
} else if(Reference.FUNCTIONS_LIST.includes(identifierLC)) {
return new Token(TokenType.FUNCTION, identifierLC, this.input.position-identifier.length)
} else if(Reference.UNARY_OPERATORS.includes(identifierLC) ||
Reference.BINARY_OPERATORS.includes(identifierLC) ||
Reference.TERTIARY_OPERATORS.includes(identifierLC)
) {
return new Token(TokenType.OPERATOR, identifierLC, this.input.position-identifier.length)
} else {
return new Token(TokenType.VARIABLE, identifier, this.input.position-identifier.length)
}
@ -137,7 +145,7 @@ class ExpressionTokenizer {
if(Reference.CONSTANTS_LIST.includes(c)) return new Token(TokenType.CONSTANT, this.input.next(), this.input.position-1);
if(PUNCTUTATION.includes(c)) return new Token(TokenType.PUNCT, this.input.next(), this.input.position-1);
if(this.errorOnUnknown)
this.input.throw("Unknown token character " + c)
this.raise("Unknown token character " + c)
else
return new Token(TokenType.UNKNOWN, this.input.next(), this.input.position-1);
}
@ -156,14 +164,25 @@ class ExpressionTokenizer {
this.currentToken = null;
return tmp;
}
read(type, value) {
let next = this.next()
if(type != null && next.type != type)
this.raise(`Unexpected ${next.type.toLowerCase()} ${next.value}. Expected type was ${type.toLowerCase()}.`);
if(value != null && next.value == value)
this.raise(`Unexpected ${next.type.toLowerCase()} ${next.value}. Expected value was ${value}.`);
return next
}
atEnd() {
return this.peek() == null;
}
skip(type) {
let next = this.next();
if(next.type != type)
input.raise("Unexpected token " + next.type.toLowerCase() + ' "' + next.value + '". Expected ' + type.toLowerCase());
skip(type, value) {
this.read(type, value)
}
raise(message) {
this.input.raise(message)
}
}