Compare commits

...

4 commits

Author SHA1 Message Date
64d5f11ff9
FlatPlusOperation for future simplifications!
All checks were successful
continuous-integration/drone/push Build is passing
2022-10-25 16:26:08 +02:00
80d0dad63a
Implementing parseBinaryOperator
All checks were successful
continuous-integration/drone/push Build is passing
2022-10-23 18:09:20 +02:00
d7704110dd
Lots of changes to the AST Builder.
All checks were successful
continuous-integration/drone/push Build is passing
Adding IDENTIFIER Token type with optional differentiation between functions, constants and variables for syntax highlighting.
Adding substitute for many AST elements.
Properly starting builder: parsing identifiers, functions, variables, array values, properties, numbers, strings and subexpressions.
2022-10-23 15:43:47 +02:00
666d611e95
Fixing up old stuff, cleaning up and standardizing AST. 2022-10-22 14:06:03 +02:00
6 changed files with 1139 additions and 465 deletions

View file

@ -504,7 +504,7 @@ Item {
Generates a list of tokens from the given. Generates a list of tokens from the given.
*/ */
function tokens(text) { function tokens(text) {
let tokenizer = new Parsing.Tokenizer(new Parsing.Input(text), true, false) let tokenizer = new Parsing.Tokenizer(new Parsing.Input(text), true, true, false)
let tokenList = [] let tokenList = []
let token let token
while((token = tokenizer.next()) != null) while((token = tokenizer.next()) != null)

View file

@ -103,7 +103,7 @@ function functionToLatex(f, args) {
* @param {string} vari - variable to convert * @param {string} vari - variable to convert
* @returns {string} * @returns {string}
*/ */
function variable(vari) {
let unicodechars = ["α","β","γ","δ","ε","ζ","η", let unicodechars = ["α","β","γ","δ","ε","ζ","η",
"π","θ","κ","λ","μ","ξ","ρ", "π","θ","κ","λ","μ","ξ","ρ",
"ς","σ","τ","φ","χ","ψ","ω", "ς","σ","τ","φ","χ","ψ","ω",
@ -124,6 +124,7 @@ function variable(vari) {
"{}^{7}","{}^{8}","{}^{9}","{}^{0}","{}_{1}","{}_{2}","{}_{3}", "{}^{7}","{}^{8}","{}^{9}","{}^{0}","{}_{1}","{}_{2}","{}_{3}",
"{}_{4}","{}_{5}","{}_{6}","{}_{7}","{}_{8}","{}_{9}","{}_{0}", "{}_{4}","{}_{5}","{}_{6}","{}_{7}","{}_{8}","{}_{9}","{}_{0}",
"\\pi"] "\\pi"]
function variable(vari) {
for(let i = 0; i < unicodechars.length; i++) { for(let i = 0; i < unicodechars.length; i++) {
//console.log(vari, unicodechars[i], equivalchars[i]); //console.log(vari, unicodechars[i], equivalchars[i]);
if(vari.includes(unicodechars[i])) if(vari.includes(unicodechars[i]))

View file

@ -23,25 +23,258 @@
class ExpressionBuilder { class ExpressionBuilder {
constructor(tokenizer) { constructor(tokenizer, rememberTokens = false) {
this.tokenizer = tokenizer; this.tokenizer = tokenizer;
if(tokenizer.tokenizeWhitespaces) {
console.warn('WARNING: The ExpressionTokenizer for ExpressionBuilder was initialized with whitespace support. Disabled.')
tokenizer.tokenizeWhitespaces = false
}
if(tokenizer.differentiateIdentifiers) {
console.warn('WARNING: The ExpressionTokenizer for ExpressionBuilder was initialized with identifier differentiation support. Disabled.')
tokenizer.differentiateIdentifiers = false
}
this.tokens = []
this.rememberTokens = rememberTokens
this.stack = []
} }
parseExpression(delimitors = '') { /**
// Parse a sequence of operations, and orders them based on OPERATION_PRIORITY. * Parses an expression until the end is reached.
let elements = [] *
let operators = [] * @throws {Error} When an unexpected token is encountered.
let firstToken = this.tokenizer.peek(); * @returns {AbstractSyntaxElement}
if(firstToken.type == TK.TokenType.OPERATOR) // First operations. */
if(firstToken.value == "-") { parseFullExpression() {
// TODO: Set initial argument. this.parseExpression([])
this.tokenizer.skip(TK.TokenType.OPERATOR) return this.stack.pop()
}
/**
* Parses an expression until the end is reached.
*
* @param {string} punctuationDelimitators - List of deliminators that ends the expression
* @throws {Error} When an unexpected token is encountered.
*/
parseExpression(punctuationDelimitators = []) {
let token
while((token = this.tokenizer.peek()) != null) {
if(token.type == TK.TokenType.PUNCT && token.value != '(') // Still allow expression creation.
if(punctuationDelimitators.includes(token.value))
break
else if(punctuationDelimitators.length == 1)
this.tokenizer.raise(`Unexpected ${token.value}. Expected '${punctuationDelimitators[0]}'.`)
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected one of: '${punctuationDelimitators[0].join("', '")}'.`)
else
this.handleSingle()
}
if(token == null && punctuationDelimitators.length > 0)
if(punctuationDelimitators.length == 1)
this.tokenizer.raise(`Unexpected end of expression. Expected '${punctuationDelimitators[0]}'.`)
else
this.tokenizer.raise(`Unexpected end of expression. Expected one of: '${punctuationDelimitators[0].join("', '")}'.`)
if(this.stack.length == 0)
if(token == null)
this.tokenizer.raise(`Unexpected end of expression. Expected at least one element.`)
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected at least one element.`)
if(this.stack.length > 1)
this.tokenizer.raise('Invalid expression.')
}
/**
* Handles a single (assumed non-null) token based on its type.
*
* @param {AbstractSyntaxElement} token
* @throws {Error} When an unexpected token is encountered.
*/
handleSingle(token) {
switch(token.type) {
case TK.TokenType.NUMBER:
this.stack.push(new AST.NumberElement(this.tokenizer.next().value))
break
case TK.TokenType.STRING:
this.stack.push(new AST.StringElement(this.tokenizer.next().value))
break
case TK.TokenType.IDENTIFIER:
case TK.TokenType.OPERATOR:
if(this.stack.length == 0 && Reference.UNARY_OPERATORS.includes(token.value))
this.parseSingleOperation()
else if(this.stack.length > 0 && Reference.BINARY_OPERATORS.includes(token.value))
this.parseBinaryOperations()
else if(this.stack.length > 0 && Reference.TERTIARY_OPERATORS.includes(token.value))
this.parseTertiaryOperation()
else if(token.type == TK.TokenType.IDENTIFIER)
// If it isn't a reserved keyword for operators (e.g and, or...), then it *is* and identifier.
this.parseIdentifier()
else
this.tokenizer.raise(`Unknown operator: ${token.value}.`)
break
case TK.TokenType.PUNCT:
if(token.value == '(') {
this.tokenizer.skip(TK.TokenType.PUNCT, '(') // Skip the opening parentheses.
this.parseExpression([')'])
} else } else
tokenizer.input.raise(`Invalid operator ${firstToken.value} at begining of statement.`) this.tokenizer.raise(`Unexpected ${token.value}. Expected a value.`)
break
default:
this.tokenizer.raise(`Unknown token provided: ${token.value}.`)
break
}
if(this.rememberTokens)
this.tokens.push(token)
}
/**
* Parses a single token element.
*
* @throws {Error} When an unexpected token is encountered.
*/
parseSingle() {
let token = this.tokenizer.peek()
if(token != null)
this.handleSingle(token)
}
parseIdentifier() {
// Assuming the right type.
let token = this.tokenizer.read(TK.TokenType.IDENTIFIER)
if(Reference.CONSTANTS_LIST.includes(token.value))
this.stack.push(new AST.Constant(token.value))
else
this.stack.push(new AST.Variable(token.value))
this.checkIdentifierFollowupTokens()
}
/**
* Parses a function based on a previously called identifier.
* NOTE: Expects to have at least one stack element for function name.
*/
parseFunction() {
// TODO: Implement dynamic functions values instead of being based on names.
let functionValue = this.stack.pop()
if(!(functionValue instanceof AST.Variable))
this.tokenizer.raise("Executing functions from dynamic variables is not implemented".)
let functionName = functionValue.variableName
let args = []
let token
while((token = this.tokenizer.peek()) != null && token.value != ')') {
this.tokenizer.skip(TK.TokenType.PUNCT) // Skip the opening parenthesis and the commas.
parseExpression([',',')'])
args.push(this.stack.pop())
}
if(token == null)
this.tokenizer.raise(`Unexpected end of expression. Expected ')'.`)
if(this.functionName == 'derivation')
this.stack.push(new AST.DerivationElement(args))
else if(this.functionName == 'integral')
this.stack.push(new AST.IntegralElement(args))
else
this.stack.push(new AST.FunctionElement(functionName, args))
}
/**
* Parses an object property based on a previously called identifier.
* NOTE: Expects to have at least one stack element for property object name.
*/
parseProperty() {
this.tokenizer.skip(TK.TokenType.PUNCT, '.') // Skipping the dot.
let token = this.tokenizer.read(TK.TokenType.IDENTIFIER)
this.stack.push(new AST.PropertyElement(this.stack.pop(), token.value))
this.checkIdentifierFollowupTokens()
}
/**
* Parses the value of the element of an array at a given index based on a previously called identifier.
* NOTE: Expects to have at least one stack element for property object name.
*/
parseArrayValue() {
this.tokenizer.skip(TK.TokenType.PUNCT, '[') // Skipping the array opener.
let obj = this.stack.pop()
parseExpression([']'])
this.stack.push(new AST.ArrayElement(obj, this.stack.pop()))
this.checkIdentifierFollowupTokens()
}
/**
* Checks for followup tokens following a value getting.
* E.g: getting the property of an object, an array member, or calling a function.^
* NOTE: Expects to have at least one stack element for previous calling object.
*/
checkIdentifierFollowupTokens() {
let peeked = this.tokenizer.peek()
if(peeked != null && peeked.type == TK.TokenType.PUNCT)
switch(peeked.value) {
case '(':
// Function call
this.parseFunction()
break
case '.':
// Member property
this.parseProperty()
break
case '[':
// Array value
this.parseArrayValue()
break
}
}
parseBinaryOperations() {
if((this.tokenizer.peek().value in AST.BINARY_OPERATION_PRIORITY))
throw new Error("Current token is not a binary operator.")
if(this.stack.length == 0)
throw new Error(`The operator ${this.tokenizer.peek().value} can only be used after a value.`)
// Parse a sequence of operations, and orders them based on OPERATION_PRIORITY.
let elements = [this.stack.pop()]
let operators = [this.tokenizer.next().value]
let nextIsOperator = false
let token
while((token = this.tokenizer.peek()) != null) {
if(nextIsOperator)
if(token.type == TK.TokenType.PUNCT)
if(token.value == ')')
// Don't skip that token, but stop the parsing,
// as it may be an unopened expression.
break
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected an operator, or ')'.`)
else if(token.type == TK.TokenType.IDENTIFIER)
if(Reference.BINARY_OPERATORS.includes(token.value))
this.operartors.push(this.tokenizer.next().value)
else if(Reference.TERTIARY_OPERATORS.includes(token.value))
// Break to let the hand back to the parser.
break
else if(Reference.UNARY_OPERATORS.includes(token.value))
this.tokenizer.raise(`Invalid use of operator ${token.value} after ${elements.pop().value}.`)
else
this.tokenizer.raise(`Unknown operator: ${token.value}.`)
else { else {
handleSingle(token)
let value = this.stack.pop()
if(token.value != '(' && (value instanceof AST.BinaryOperation || value instanceof AST.TertiaryOperation))
// In case you chain something like 'or' and '*'
// Unary operations are exempted from this as they are used for a single value.
this.tokenizer.raise(`Cannot chain operations ${operators.pop().value} and ${value.ope}.`)
elements.push(value)
} }
} }
// Now we have our full chain, we need to match by operation priority
parseOperation()` // TODO: Implement FlatBinaryOperations for better simplification and smarter trees.
for(let ope of AST.BINARY_OPERATORS)
while(operators.includes(ope)) { // Skip if not in priority.
let index = operators.indexOf(ope)
operators.splice(index, 1) // Remove operator from array.
elements.splice(index, 2, new BinaryOperation(elements[index], ope, elements[index+1]))
}
// At the end, there should be no more operators and only one element.
this.stack.push(elements.pop())
}
} }

View file

@ -28,6 +28,23 @@ const CONSTANTS = {
}; };
const CONSTANTS_LIST = Object.keys(CONSTANTS); const CONSTANTS_LIST = Object.keys(CONSTANTS);
const UNARY_OPERATORS = []
const BINARY_OPERATION_PRIORITY = {
'and': 0, 'or': 0, 'in': 0,
'==': 10, '!=' : 10,
'>': 10, '<': 10, '>=': 10, '<=': 10,
'%': 20, '||': 20,
'+': 30, '-': 30,
'*': 40, '/': 40,
'^': 50
}
// Sorted by priority (most to least)
const BINARY_OPERATORS = Object.keys(BINARY_OPERATION_PRIORITY).sort((ope1, ope2) => BINARY_OPERATION_PRIORITY[ope2]-BINARY_OPERATION_PRIORITY[ope1])
const TERTIARY_OPERATORS = ['?']
const FUNCTIONS = { const FUNCTIONS = {
"abs": Math.abs, "abs": Math.abs,
"acos": Math.acos, "acos": Math.acos,
@ -71,15 +88,15 @@ const FUNCTIONS_LIST = Object.keys(FUNCTIONS);
// TODO: Complete // TODO: Complete
const DERIVATIVES = { const DERIVATIVES = {
"abs": "abs(<1>)/<1>", "abs": "abs(<1>)/<1>",
"acos": "-derivate(<1>)/sqrt(1-(<1>)^2)", "acos": "-derivative(<1>)/sqrt(1-(<1>)^2)",
"acosh": "derivate(<1>)/sqrt((<1>)^2-1)", "acosh": "derivative(<1>)/sqrt((<1>)^2-1)",
"asin": "derivate(<1>)/sqrt(1-(<1>)^2)", "asin": "derivative(<1>)/sqrt(1-(<1>)^2)",
"asinh": "derivate(<1>)/sqrt((<1>)^2+1)", "asinh": "derivative(<1>)/sqrt((<1>)^2+1)",
"atan": "derivate(<1>)/(1+(<1>)^2)", "atan": "derivative(<1>)/(1+(<1>)^2)",
"atan2": "", "atan2": "",
} }
const INTEGRALS = { const INTEGRALS = {
"abs": "integrate(<1>)*sign(<1>)", "abs": "integral(<1>)*sign(<1>)",
"acos": "", "acos": "",
"acosh": "", "acosh": "",
"asin": "", "asin": "",

View file

@ -23,13 +23,14 @@
const WHITESPACES = " \t\n\r" const WHITESPACES = " \t\n\r"
const STRING_LIMITORS = '"\'`'; const STRING_LIMITORS = '"\'`';
const OPERATORS = "+-*/^%?:=!><"; const OPERATORS = "+-*/^%?:=!><";
const PUNCTUTATION = "()[]{},."; const PUNCTUTATION = "()[],.";
const NUMBER_CHARS = "0123456789" const NUMBER_CHARS = "0123456789"
const IDENTIFIER_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789_₀₁₂₃₄₅₆₇₈₉αβγδεζηθκλμξρςστφχψωₐₑₒₓₔₕₖₗₘₙₚₛₜ" const IDENTIFIER_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789_₀₁₂₃₄₅₆₇₈₉αβγδεζηθκλμξρςστφχψωₐₑₒₓₔₕₖₗₘₙₚₛₜ"
var TokenType = { var TokenType = {
// Expression type // Expression type
"WHITESPACE": "WHITESPACE", "WHITESPACE": "WHITESPACE",
"IDENTIFIER": "IDENTIFIER",
"VARIABLE": "VARIABLE", "VARIABLE": "VARIABLE",
"CONSTANT": "CONSTANT", "CONSTANT": "CONSTANT",
"FUNCTION": "FUNCTION", "FUNCTION": "FUNCTION",
@ -49,10 +50,11 @@ class Token {
} }
class ExpressionTokenizer { class ExpressionTokenizer {
constructor(input, tokenizeWhitespaces = false, errorOnUnknown = true) { constructor(input, tokenizeWhitespaces = false, differentiateIdentifiers = false, errorOnUnknown = true) {
this.input = input; this.input = input
this.currentToken = null; this.currentToken = null
this.tokenizeWhitespaces = tokenizeWhitespaces this.tokenizeWhitespaces = tokenizeWhitespaces
this.differentiateIdentifiers = differentiateIdentifiers
this.errorOnUnknown = errorOnUnknown this.errorOnUnknown = errorOnUnknown
} }
@ -115,10 +117,16 @@ class ExpressionTokenizer {
while(!this.input.atEnd() && IDENTIFIER_CHARS.includes(this.input.peek().toLowerCase())) { while(!this.input.atEnd() && IDENTIFIER_CHARS.includes(this.input.peek().toLowerCase())) {
identifier += this.input.next(); identifier += this.input.next();
} }
if(Reference.CONSTANTS_LIST.includes(identifier.toLowerCase())) { let identifierLC = identifier.toLowerCase()
return new Token(TokenType.CONSTANT, identifier.toLowerCase(), this.input.position-identifier.length) if(Reference.CONSTANTS_LIST.includes(identifierLC)) {
} else if(Reference.FUNCTIONS_LIST.includes(identifier.toLowerCase())) { return new Token(TokenType.CONSTANT, identifierLC, this.input.position-identifier.length)
return new Token(TokenType.FUNCTION, identifier.toLowerCase(), this.input.position-identifier.length) } else if(Reference.FUNCTIONS_LIST.includes(identifierLC)) {
return new Token(TokenType.FUNCTION, identifierLC, this.input.position-identifier.length)
} else if(Reference.UNARY_OPERATORS.includes(identifierLC) ||
Reference.BINARY_OPERATORS.includes(identifierLC) ||
Reference.TERTIARY_OPERATORS.includes(identifierLC)
) {
return new Token(TokenType.OPERATOR, identifierLC, this.input.position-identifier.length)
} else { } else {
return new Token(TokenType.VARIABLE, identifier, this.input.position-identifier.length) return new Token(TokenType.VARIABLE, identifier, this.input.position-identifier.length)
} }
@ -137,7 +145,7 @@ class ExpressionTokenizer {
if(Reference.CONSTANTS_LIST.includes(c)) return new Token(TokenType.CONSTANT, this.input.next(), this.input.position-1); if(Reference.CONSTANTS_LIST.includes(c)) return new Token(TokenType.CONSTANT, this.input.next(), this.input.position-1);
if(PUNCTUTATION.includes(c)) return new Token(TokenType.PUNCT, this.input.next(), this.input.position-1); if(PUNCTUTATION.includes(c)) return new Token(TokenType.PUNCT, this.input.next(), this.input.position-1);
if(this.errorOnUnknown) if(this.errorOnUnknown)
this.input.throw("Unknown token character " + c) this.raise("Unknown token character " + c)
else else
return new Token(TokenType.UNKNOWN, this.input.next(), this.input.position-1); return new Token(TokenType.UNKNOWN, this.input.next(), this.input.position-1);
} }
@ -157,13 +165,24 @@ class ExpressionTokenizer {
return tmp; return tmp;
} }
read(type, value) {
let next = this.next()
if(type != null && next.type != type)
this.raise(`Unexpected ${next.type.toLowerCase()} ${next.value}. Expected type was ${type.toLowerCase()}.`);
if(value != null && next.value == value)
this.raise(`Unexpected ${next.type.toLowerCase()} ${next.value}. Expected value was ${value}.`);
return next
}
atEnd() { atEnd() {
return this.peek() == null; return this.peek() == null;
} }
skip(type) { skip(type, value) {
let next = this.next(); this.read(type, value)
if(next.type != type) }
input.raise("Unexpected token " + next.type.toLowerCase() + ' "' + next.value + '". Expected ' + type.toLowerCase());
raise(message) {
this.input.raise(message)
} }
} }