Compare commits

...

2 commits

Author SHA1 Message Date
d7704110dd
Lots of changes to the AST Builder.
All checks were successful
continuous-integration/drone/push Build is passing
Adding IDENTIFIER Token type with optional differentiation between functions, constants and variables for syntax highlighting.
Adding substitute for many AST elements.
Properly starting builder: parsing identifiers, functions, variables, array values, properties, numbers, strings and subexpressions.
2022-10-23 15:43:47 +02:00
666d611e95
Fixing up old stuff, cleaning up and standardizing AST. 2022-10-22 14:06:03 +02:00
6 changed files with 927 additions and 431 deletions

View file

@ -504,7 +504,7 @@ Item {
Generates a list of tokens from the given.
*/
function tokens(text) {
let tokenizer = new Parsing.Tokenizer(new Parsing.Input(text), true, false)
let tokenizer = new Parsing.Tokenizer(new Parsing.Input(text), true, true, false)
let tokenList = []
let token
while((token = tokenizer.next()) != null)

View file

@ -103,8 +103,8 @@ function functionToLatex(f, args) {
* @param {string} vari - variable to convert
* @returns {string}
*/
function variable(vari) {
let unicodechars = ["α","β","γ","δ","ε","ζ","η",
let unicodechars = ["α","β","γ","δ","ε","ζ","η",
"π","θ","κ","λ","μ","ξ","ρ",
"ς","σ","τ","φ","χ","ψ","ω",
"Γ","Δ","Θ","Λ","Ξ","Π","Σ",
@ -114,7 +114,7 @@ function variable(vari) {
"⁷","⁸","⁹","⁰","₁","₂","₃",
"₄","₅","₆","₇","₈","₉","₀",
"pi"]
let equivalchars = ["\\alpha","\\beta","\\gamma","\\delta","\\epsilon","\\zeta","\\eta",
let equivalchars = ["\\alpha","\\beta","\\gamma","\\delta","\\epsilon","\\zeta","\\eta",
"\\pi","\\theta","\\kappa","\\lambda","\\mu","\\xi","\\rho",
"\\sigma","\\sigma","\\tau","\\phi","\\chi","\\psi","\\omega",
"\\Gamma","\\Delta","\\Theta","\\Lambda","\\Xi","\\Pi","\\Sigma",
@ -124,6 +124,7 @@ function variable(vari) {
"{}^{7}","{}^{8}","{}^{9}","{}^{0}","{}_{1}","{}_{2}","{}_{3}",
"{}_{4}","{}_{5}","{}_{6}","{}_{7}","{}_{8}","{}_{9}","{}_{0}",
"\\pi"]
function variable(vari) {
for(let i = 0; i < unicodechars.length; i++) {
//console.log(vari, unicodechars[i], equivalchars[i]);
if(vari.includes(unicodechars[i]))

View file

@ -23,25 +23,217 @@
class ExpressionBuilder {
constructor(tokenizer) {
constructor(tokenizer, rememberTokens = false) {
this.tokenizer = tokenizer;
if(tokenizer.tokenizeWhitespaces) {
console.warn('WARNING: The ExpressionTokenizer for ExpressionBuilder was initialized with whitespace support. Disabled.')
tokenizer.tokenizeWhitespaces = false
}
if(tokenizer.differentiateIdentifiers) {
console.warn('WARNING: The ExpressionTokenizer for ExpressionBuilder was initialized with identifier differentiation support. Disabled.')
tokenizer.differentiateIdentifiers = false
}
this.tokens = []
this.rememberTokens = rememberTokens
this.stack = []
}
parseExpression(delimitors = '') {
// Parse a sequence of operations, and orders them based on OPERATION_PRIORITY.
let elements = []
let operators = []
let firstToken = this.tokenizer.peek();
if(firstToken.type == TK.TokenType.OPERATOR) // First operations.
if(firstToken.value == "-") {
// TODO: Set initial argument.
this.tokenizer.skip(TK.TokenType.OPERATOR)
/**
* Parses an expression until the end is reached.
*
* @throws {Error} When an unexpected token is encountered.
* @returns {AbstractSyntaxElement}
*/
parseFullExpression() {
this.parseExpression([])
return this.stack.pop()
}
/**
* Parses an expression until the end is reached.
*
* @param {string} punctuationDelimitators - List of deliminators that ends the expression
* @throws {Error} When an unexpected token is encountered.
*/
parseExpression(punctuationDelimitators = []) {
let token
while((token = this.tokenizer.peek()) != null) {
if(token.type == TK.TokenType.PUNCT && token.value != '(') // Still allow expression creation.
if(punctuationDelimitators.includes(token.value))
break
else if(punctuationDelimitators.length == 1)
this.tokenizer.raise(`Unexpected ${token.value}. Expected '${punctuationDelimitators[0]}'.`)
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected one of: '${punctuationDelimitators[0].join("', '")}'.`)
else
this.handleSingle()
}
if(token == null && punctuationDelimitators.length > 0)
if(punctuationDelimitators.length == 1)
this.tokenizer.raise(`Unexpected end of expression. Expected '${punctuationDelimitators[0]}'.`)
else
this.tokenizer.raise(`Unexpected end of expression. Expected one of: '${punctuationDelimitators[0].join("', '")}'.`)
if(this.stack.length == 0)
if(token == null)
this.tokenizer.raise(`Unexpected end of expression. Expected at least one element.`)
else
this.tokenizer.raise(`Unexpected ${token.value}. Expected at least one element.`)
if(this.stack.length > 1)
this.tokenizer.raise('Invalid expression.')
}
/**
* Handles a single (assumed non-null) token based on its type.
*
* @param {AbstractSyntaxElement} token
* @throws {Error} When an unexpected token is encountered.
*/
handleSingle(token) {
switch(token.type) {
case TK.TokenType.IDENTIFIER:
this.parseIdentifier()
break
case TK.TokenType.OPERATOR:
if(this.stack.length == 0 && Reference.UNARY_OPERATORS.includes(token.value))
this.parseSingleOperation()
else if(this.stack.length > 0 && Reference.BINARY_OPERATORS.includes(token.value))
this.parseBinaryOperations()
else if(this.stack.length > 0 && Reference.TERTIARY_OPERATORS.includes(token.value))
this.parseTertiaryOperation()
break
case TK.TokenType.NUMBER:
this.stack.push(new AST.NumberElement(this.tokenizer.next().value))
break
case TK.TokenType.STRING:
this.stack.push(new AST.StringElement(this.tokenizer.next().value))
break
case TK.TokenType.PUNCT:
if(token.value == '(') {
this.tokenizer.skip(TK.TokenType.PUNCT, '(') // Skip the opening parentheses.
this.parseExpression([')'])
} else
tokenizer.input.raise(`Invalid operator ${firstToken.value} at begining of statement.`)
else {
this.tokenizer.raise(`Unexpected ${token.value}. Expected a value.`)
break
default:
this.tokenizer.raise(`Unknown token provided: ${token.value}.`)
break
}
if(this.rememberTokens)
this.tokens.push(token)
}
/**
* Parses a single token element.
*
* @throws {Error} When an unexpected token is encountered.
*/
parseSingle() {
let token = this.tokenizer.peek()
if(token != null)
this.handleSingle(token)
}
parseIdentifier() {
// Assuming the right type.
let token = this.tokenizer.read(TK.TokenType.IDENTIFIER)
if(Reference.CONSTANTS_LIST.includes(token.value))
this.stack.push(new AST.Constant(token.value))
else
this.stack.push(new AST.Variable(token.value))
this.checkIdentifierFollowupTokens()
}
/**
* Parses a function based on a previously called identifier.
* NOTE: Expects to have at least one stack element for function name.
*/
parseFunction() {
// TODO: Implement dynamic functions values instead of being based on names.
let functionValue = this.stack.pop()
if(!(functionValue instanceof AST.Variable))
this.tokenizer.raise("Executing functions from dynamic variables is not implemented".)
let functionName = functionValue.variableName
let args = []
let token
while((token = this.tokenizer.peek()) != null && token.value != ')') {
this.tokenizer.skip(TK.TokenType.PUNCT) // Skip the opening parenthesis and the commas.
parseExpression([',',')'])
args.push(this.stack.pop())
}
if(token == null)
this.tokenizer.raise(`Unexpected end of expression. Expected ')'.`)
if(this.functionName == 'derivation')
this.stack.push(new AST.DerivationElement(args))
else if(this.functionName == 'integral')
this.stack.push(new AST.IntegralElement(args))
else
this.stack.push(new AST.FunctionElement(functionName, args))
}
/**
* Parses an object property based on a previously called identifier.
* NOTE: Expects to have at least one stack element for property object name.
*/
parseProperty() {
this.tokenizer.skip(TK.TokenType.PUNCT, '.') // Skipping the dot.
let token = this.tokenizer.read(TK.TokenType.IDENTIFIER)
this.stack.push(new AST.PropertyElement(this.stack.pop(), token.value))
this.checkIdentifierFollowupTokens()
}
/**
* Parses the value of the element of an array at a given index based on a previously called identifier.
* NOTE: Expects to have at least one stack element for property object name.
*/
parseArrayValue() {
this.tokenizer.skip(TK.TokenType.PUNCT, '[') // Skipping the array opener.
let obj = this.stack.pop()
parseExpression([']'])
this.stack.push(new AST.ArrayElement(obj, this.stack.pop()))
this.checkIdentifierFollowupTokens()
}
/**
* Checks for followup tokens following a value getting.
* E.g: getting the property of an object, an array member, or calling a function.
* NOTE: Expects to have at least one stack element for previous calling object.
*/
checkIdentifierFollowupTokens() {
let peeked = this.tokenizer.peek()
if(peeked != null && peeked.type == TK.TokenType.PUNCT)
switch(peeked.value) {
case '(':
// Function call
this.parseFunction()
break
case '.':
// Member property
this.parseProperty()
break
case '[':
// Array value
this.parseArrayValue()
break
}
}
parseOperation()`
parseBinaryOperations() {
if((this.tokenizer.peek().value in AST.BINARY_OPERATION_PRIORITY))
throw new Error("Current token is not a binary operator.")
if(this.stack.length == 0)
throw new Error(`The operator ${this.tokenizer.peek().value} can only be used after a value.`)
// Parse a sequence of operations, and orders them based on OPERATION_PRIORITY.
let elements = [this.stack.pop()]
let operators = [this.tokenizer.next()]
let token
while((token = this.tokenizer.peek()) != null) {
}
}
}

View file

@ -28,6 +28,21 @@ const CONSTANTS = {
};
const CONSTANTS_LIST = Object.keys(CONSTANTS);
const UNARY_OPERATORS = []
const BINARY_OPERATION_PRIORITY = {
'and': 0, 'or': 0, 'in': 0,
'==': 10, '!=' : 10,
'>': 10, '<': 10, '>=': 10, '<=': 10,
'%': 20, '||': 20,
'+': 30, '-': 30,
'*': 40, '/': 40,
'^': 50
}
const BINARY_OPERATORS = Object.keys(BINARY_OPERATION_PRIORITY)
const TERTIARY_OPERATORS = ['?']
const FUNCTIONS = {
"abs": Math.abs,
"acos": Math.acos,
@ -71,15 +86,15 @@ const FUNCTIONS_LIST = Object.keys(FUNCTIONS);
// TODO: Complete
const DERIVATIVES = {
"abs": "abs(<1>)/<1>",
"acos": "-derivate(<1>)/sqrt(1-(<1>)^2)",
"acosh": "derivate(<1>)/sqrt((<1>)^2-1)",
"asin": "derivate(<1>)/sqrt(1-(<1>)^2)",
"asinh": "derivate(<1>)/sqrt((<1>)^2+1)",
"atan": "derivate(<1>)/(1+(<1>)^2)",
"acos": "-derivative(<1>)/sqrt(1-(<1>)^2)",
"acosh": "derivative(<1>)/sqrt((<1>)^2-1)",
"asin": "derivative(<1>)/sqrt(1-(<1>)^2)",
"asinh": "derivative(<1>)/sqrt((<1>)^2+1)",
"atan": "derivative(<1>)/(1+(<1>)^2)",
"atan2": "",
}
const INTEGRALS = {
"abs": "integrate(<1>)*sign(<1>)",
"abs": "integral(<1>)*sign(<1>)",
"acos": "",
"acosh": "",
"asin": "",

View file

@ -23,13 +23,14 @@
const WHITESPACES = " \t\n\r"
const STRING_LIMITORS = '"\'`';
const OPERATORS = "+-*/^%?:=!><";
const PUNCTUTATION = "()[]{},.";
const PUNCTUTATION = "()[],.";
const NUMBER_CHARS = "0123456789"
const IDENTIFIER_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789_₀₁₂₃₄₅₆₇₈₉αβγδεζηθκλμξρςστφχψωₐₑₒₓₔₕₖₗₘₙₚₛₜ"
var TokenType = {
// Expression type
"WHITESPACE": "WHITESPACE",
"IDENTIFIER": "IDENTIFIER",
"VARIABLE": "VARIABLE",
"CONSTANT": "CONSTANT",
"FUNCTION": "FUNCTION",
@ -49,10 +50,11 @@ class Token {
}
class ExpressionTokenizer {
constructor(input, tokenizeWhitespaces = false, errorOnUnknown = true) {
this.input = input;
this.currentToken = null;
constructor(input, tokenizeWhitespaces = false, differentiateIdentifiers = false, errorOnUnknown = true) {
this.input = input
this.currentToken = null
this.tokenizeWhitespaces = tokenizeWhitespaces
this.differentiateIdentifiers = differentiateIdentifiers
this.errorOnUnknown = errorOnUnknown
}
@ -115,10 +117,16 @@ class ExpressionTokenizer {
while(!this.input.atEnd() && IDENTIFIER_CHARS.includes(this.input.peek().toLowerCase())) {
identifier += this.input.next();
}
if(Reference.CONSTANTS_LIST.includes(identifier.toLowerCase())) {
return new Token(TokenType.CONSTANT, identifier.toLowerCase(), this.input.position-identifier.length)
} else if(Reference.FUNCTIONS_LIST.includes(identifier.toLowerCase())) {
return new Token(TokenType.FUNCTION, identifier.toLowerCase(), this.input.position-identifier.length)
let identifierLC = identifier.toLowerCase()
if(Reference.CONSTANTS_LIST.includes(identifierLC)) {
return new Token(TokenType.CONSTANT, identifierLC, this.input.position-identifier.length)
} else if(Reference.FUNCTIONS_LIST.includes(identifierLC)) {
return new Token(TokenType.FUNCTION, identifierLC, this.input.position-identifier.length)
} else if(Reference.UNARY_OPERATORS.includes(identifierLC) ||
Reference.BINARY_OPERATORS.includes(identifierLC) ||
Reference.TERTIARY_OPERATORS.includes(identifierLC)
) {
return new Token(TokenType.OPERATOR, identifierLC, this.input.position-identifier.length)
} else {
return new Token(TokenType.VARIABLE, identifier, this.input.position-identifier.length)
}
@ -137,7 +145,7 @@ class ExpressionTokenizer {
if(Reference.CONSTANTS_LIST.includes(c)) return new Token(TokenType.CONSTANT, this.input.next(), this.input.position-1);
if(PUNCTUTATION.includes(c)) return new Token(TokenType.PUNCT, this.input.next(), this.input.position-1);
if(this.errorOnUnknown)
this.input.throw("Unknown token character " + c)
this.raise("Unknown token character " + c)
else
return new Token(TokenType.UNKNOWN, this.input.next(), this.input.position-1);
}
@ -157,13 +165,24 @@ class ExpressionTokenizer {
return tmp;
}
read(type, value) {
let next = this.next()
if(type != null && next.type != type)
this.raise(`Unexpected ${next.type.toLowerCase()} ${next.value}. Expected type was ${type.toLowerCase()}.`);
if(value != null && next.value == value)
this.raise(`Unexpected ${next.type.toLowerCase()} ${next.value}. Expected value was ${value}.`);
return next
}
atEnd() {
return this.peek() == null;
}
skip(type) {
let next = this.next();
if(next.type != type)
input.raise("Unexpected token " + next.type.toLowerCase() + ' "' + next.value + '". Expected ' + type.toLowerCase());
skip(type, value) {
this.read(type, value)
}
raise(message) {
this.input.raise(message)
}
}