1 /++ 2 Some misc stuff used by the compiler 3 +/ 4 module qscript.compiler.misc; 5 6 import utils.misc; 7 import utils.lists; 8 9 import std.range; 10 import std.conv : to; 11 12 /// An array containing all chars that an identifier can contain 13 package const char[] IDENT_CHARS = iota('a', 'z'+1).array~iota('A', 'Z'+1).array~iota('0', '9'+1).array~[cast(int)'_']; 14 /// An array containing all keywords 15 package const string[] KEYWORDS = ["function", "return", "if", "else", "while", "for", "do", "void", "int", "char", "double"]; 16 /// data types 17 package const string[] DATA_TYPES = ["void", "int", "double", "char"]; 18 /// An array containing double-operand operators 19 package const string[] OPERATORS = [".", "/", "*", "+", "-", "%", "~", "<", ">", ">=", "<=", "==", "=", "&&", "||"]; 20 /// single-operand operators 21 package const string[] SOPERATORS = ["!", "@"]; 22 /// An array containing all bool-operators (operators that return true/false) 23 package const string[] BOOL_OPERATORS = ["<", ">", ">=", "<=", "==", "&&", "||"]; 24 /// Inbuilt QScript functions (like `length(void[])`) 25 package Function[] INBUILT_FUNCTIONS = [ 26 /// length(@void[], int) 27 Function("length", DataType(DataType.Type.Void), [DataType(DataType.Type.Void, 1, true), DataType(DataType.Type.Integer)]), 28 /// length(void[]) 29 Function("length", DataType(DataType.Type.Integer), [DataType(DataType.Type.Void, 1, false)]), 30 /// length (string) 31 Function("length", DataType(DataType.Type.Integer), [DataType(DataType.Type.Char, 1, false)]), 32 33 /// toInt(string) 34 Function("toInt", DataType(DataType.Type.Integer), [DataType(DataType.Type.Char, 1)]), 35 /// toInt(double) 36 Function("toInt", DataType(DataType.Type.Integer), [DataType(DataType.Type.Double)]), 37 /// toDouble(string) 38 Function("toDouble", DataType(DataType.Type.Double), [DataType(DataType.Type.Char, 1)]), 39 /// toDouble(int) 40 Function("toDouble", DataType(DataType.Type.Double), [DataType(DataType.Type.Integer)]), 41 /// toString(int) 42 Function("toStr", DataType(DataType.Type.Char, 1), [DataType(DataType.Type.Integer)]), 43 /// toString(double) 44 Function("toStr", DataType(DataType.Type.Char, 1), [DataType(DataType.Type.Double)]), 45 46 /// copy(void[], @void[]) 47 Function("copy", DataType(DataType.Type.Void), [DataType(DataType.Type.Void, 1), DataType(DataType.Type.Void, 1, true)]), 48 ]; 49 50 /// Used by compiler's functions to return error 51 public struct CompileError{ 52 string msg; /// The error stored in a string 53 uinteger lineno; /// The line number on which the error is 54 this(uinteger lineNumber, string errorMessage){ 55 lineno = lineNumber; 56 msg = errorMessage; 57 } 58 } 59 60 /// To store information about a function 61 public struct Function{ 62 /// the name of the function 63 string name; 64 /// the data type of the value returned by this function 65 DataType returnType; 66 /// stores the data type of the arguments received by this function 67 private DataType[] _argTypes; 68 /// the data type of the arguments received by this function 69 /// 70 /// if an argType is defined as void, with array dimensions=0, it means accept any type. 71 /// if an argType is defined as void with array dimensions>0, it means array of any type of that dimensions 72 @property ref DataType[] argTypes(){ 73 return _argTypes; 74 } 75 /// the data type of the arguments received by this function 76 @property ref DataType[] argTypes(DataType[] newArray){ 77 return _argTypes = newArray.dup; 78 } 79 /// constructor 80 this (string functionName, DataType functionReturnType, DataType[] functionArgTypes){ 81 name = functionName; 82 returnType = functionReturnType; 83 _argTypes = functionArgTypes.dup; 84 } 85 } 86 87 /// used to store data types for data at compile time 88 public struct DataType{ 89 /// enum defining all data types 90 public enum Type{ 91 Void, 92 Char, 93 Integer, 94 Double 95 } 96 /// the actual data type 97 Type type = DataType.Type.Void; 98 /// stores if it's an array. If type is `int`, it will be 0, if `int[]` it will be 1, if `int[][]`, then 2 ... 99 uinteger arrayDimensionCount = 0; 100 /// stores if it's a reference to a type 101 bool isRef = false; 102 /// returns: true if it's an array. Strings are arrays too (char[]) 103 @property bool isArray(){ 104 if (arrayDimensionCount > 0){ 105 return true; 106 } 107 return false; 108 } 109 /// constructor 110 /// 111 /// dataType is the type to store 112 /// arrayDimension is the number of nested arrays 113 /// isRef is whether the type is a reference to the actual type 114 this (DataType.Type dataType, uinteger arrayDimension = 0, bool isReference = false){ 115 type = dataType; 116 arrayDimensionCount = arrayDimension; 117 isRef = isReference; 118 } 119 120 /// constructor 121 /// 122 /// `sType` is the type in string form 123 this (string sType){ 124 fromString(sType); 125 } 126 /// constructor 127 /// 128 /// `data` is the data to infer type from 129 this (Token[] data){ 130 fromData(data); 131 } 132 /// reads DataType from a string, in case of failure or bad format in string, throws Exception 133 void fromString(string s){ 134 isRef = false; 135 string sType = null; 136 uinteger indexCount = 0; 137 // check if it's a ref 138 if (s.length > 0 && s[0] == '@'){ 139 isRef = true; 140 s = s.dup; 141 s = s[1 .. s.length]; 142 } 143 // read the type 144 for (uinteger i = 0, lastInd = s.length-1; i < s.length; i ++){ 145 if (s[i] == '['){ 146 sType = s[0 .. i]; 147 break; 148 }else if (i == lastInd){ 149 sType = s; 150 break; 151 } 152 } 153 // now read the index 154 for (uinteger i = sType.length; i < s.length; i ++){ 155 if (s[i] == '['){ 156 // make sure next char is a ']' 157 i ++; 158 if (s[i] != ']'){ 159 throw new Exception("invalid data type format"); 160 } 161 indexCount ++; 162 }else{ 163 throw new Exception("invalid data type"); 164 } 165 } 166 // now check if the type was ok or not 167 if (sType == "void"){ 168 type = DataType.Type.Void; 169 }else if (sType == "char"){ 170 type = DataType.Type.Char; 171 }else if (sType == "int"){ 172 type = DataType.Type.Integer; 173 }else if (sType == "double"){ 174 type = DataType.Type.Double; 175 }else{ 176 throw new Exception("invalid data type"); 177 } 178 arrayDimensionCount = indexCount; 179 } 180 181 /// identifies the data type from the actual data 182 /// keep in mind, this won't be able to identify if the data type is a reference or not 183 /// 184 /// throws Exception on failure 185 void fromData(Token[] data){ 186 /// identifies type from data 187 static DataType.Type identifyType(Token data, ref uinteger arrayDimension){ 188 if (data.type == Token.Type.String){ 189 arrayDimension ++; 190 return DataType.Type.Char; 191 }else if (data.type == Token.Type.Char){ 192 return DataType.Type.Char; 193 }else if (data.type == Token.Type.Integer){ 194 return DataType.Type.Integer; 195 }else if (data.type == Token.Type.Double){ 196 return DataType.Type.Double; 197 }else{ 198 throw new Exception("failed to read data type"); 199 } 200 } 201 // keeps count of number of "instances" of this function currently being executed 202 static uinteger callCount = 0; 203 callCount ++; 204 205 if (callCount == 1){ 206 this.arrayDimensionCount = 0; 207 this.type = DataType.Type.Void; 208 } 209 // check if is an array 210 if (data.length > 1 && 211 data[0].type == Token.Type.IndexBracketOpen && data[data.length-1].type == Token.Type.IndexBracketClose){ 212 // is an array 213 this.arrayDimensionCount ++; 214 // if elements are arrays, do recursion, else, just identify types 215 Token[][] elements = splitArray(data); 216 if (elements.length == 0){ 217 this.type = DataType.Type.Void; 218 }else{ 219 // determine the type using recursion 220 // stores the arrayDimensionCount till here 221 uinteger thisNestCount = this.arrayDimensionCount; 222 // stores the nestCount for the preious element, -1 if no previous element 223 integer prevNestCount = -1; 224 // stores the data type of the last element, void if no last element 225 DataType.Type prevType = DataType.Type.Void; 226 // now do recursion, and make sure all types come out same 227 foreach (element; elements){ 228 fromData(element); 229 // now make sure the nestCount came out same 230 if (prevNestCount != -1){ 231 if (prevNestCount != this.arrayDimensionCount){ 232 throw new Exception("inconsistent data types in array elements"); 233 } 234 }else{ 235 // set new nestCount 236 prevNestCount = this.arrayDimensionCount; 237 } 238 // now to make sure type came out same 239 if (prevType != DataType.Type.Void){ 240 if (this.type != prevType){ 241 throw new Exception("inconsistent data types in array elements"); 242 } 243 }else{ 244 prevType = this.type; 245 } 246 // re-set the nestCount for the next element 247 this.arrayDimensionCount = thisNestCount; 248 } 249 // now set the nestCount 250 this.arrayDimensionCount = prevNestCount; 251 } 252 }else if (data.length == 0){ 253 this.type = DataType.Type.Void; 254 }else{ 255 // then it must be only one token, if is zero, then it's void 256 assert(data.length == 1, "non-array data must be only one token in length"); 257 // now check the type, and set it 258 this.type = identifyType(data[0], arrayDimensionCount); 259 } 260 callCount --; 261 } 262 263 /// converts this DataType to string 264 string getStr(){ 265 char[] r; 266 if (type == DataType.Type.Void){ 267 r = cast(char[]) "void"; 268 }else if (type == DataType.Type.Double){ 269 r = cast(char[]) "double"; 270 }else if (type == DataType.Type.Integer){ 271 r = cast(char[]) "int"; 272 }else if (type == DataType.Type.Char){ 273 r = cast(char[]) "char"; 274 }else{ 275 throw new Exception("invalid type stored: "~to!string(type)); 276 } 277 if (arrayDimensionCount > 0){ 278 uinteger i = r.length; 279 r.length += arrayDimensionCount * 2; 280 for (; i < r.length; i += 2){ 281 r[i .. i+2] = "[]"; 282 } 283 } 284 return cast(string) (isRef ? '@'~r : r); 285 } 286 } 287 /// 288 unittest{ 289 assert(DataType("int") == DataType(DataType.Type.Integer, 0)); 290 assert(DataType("char[][]") == DataType(DataType.Type.Char, 2)); 291 assert(DataType("double[][]") == DataType(DataType.Type.Double, 2)); 292 assert(DataType("void") == DataType(DataType.Type.Void, 0)); 293 // unittests for `fromData` 294 import qscript.compiler.tokengen : stringToTokens; 295 DataType dType; 296 dType.fromData(["\"bla bla\""].stringToTokens); 297 assert(dType == DataType("char[]")); 298 dType.fromData(["20"].stringToTokens); 299 assert(dType == DataType("int")); 300 dType.fromData(["2.5"].stringToTokens); 301 assert(dType == DataType("double")); 302 dType.fromData(["[", "\"bla\"", ",", "\"bla\"", "]"].stringToTokens); 303 assert(dType == DataType("char[][]")); 304 dType.fromData(["[", "[", "25.0", ",", "2.5", "]", ",", "[", "15.0", ",", "25.0", "]", "]"].stringToTokens); 305 assert(dType == DataType("double[][]")); 306 } 307 308 /// splits an array in tokens format to it's elements 309 /// 310 /// For example, splitArray("[a, b, c]") will return ["a", "b", "c"] 311 package Token[][] splitArray(Token[] array){ 312 assert(array[0].type == Token.Type.IndexBracketOpen && 313 array[array.length - 1].type == Token.Type.IndexBracketClose, "not a valid array"); 314 LinkedList!(Token[]) elements = new LinkedList!(Token[]); 315 for (uinteger i = 1, readFrom = i; i < array.length; i ++){ 316 // skip any other brackets 317 if (array[i].type == Token.Type.BlockStart || array[i].type == Token.Type.IndexBracketOpen || 318 array[i].type == Token.Type.ParanthesesOpen){ 319 i = tokenBracketPos!(true)(array, i); 320 continue; 321 } 322 // check if comma is here 323 if (array[i].type == Token.Type.Comma || array[i].type == Token.Type.IndexBracketClose){ 324 if ((readFrom > i || readFrom == i) && array[i].type == Token.Type.Comma){ 325 throw new Exception("syntax error"); 326 } 327 elements.append(array[readFrom .. i]); 328 readFrom = i + 1; 329 } 330 } 331 Token[][] r = elements.toArray; 332 .destroy(elements); 333 return r; 334 } 335 336 /// Returns the index of the quotation mark that ends a string 337 /// 338 /// Returns -1 if not found 339 package integer strEnd(string s, uinteger i){ 340 const char end = s[i] == '\'' ? '\'' : '\"'; 341 for (i++;i<s.length;i++){ 342 if (s[i]=='\\'){ 343 i++; 344 continue; 345 }else if (s[i]==end){ 346 break; 347 } 348 } 349 if (i==s.length){i=-1;} 350 return i; 351 } 352 353 /// decodes a string. i.e, converts \t to tab, \" to ", etc 354 /// The string must not be surrounded by quoation marks 355 /// 356 /// throws Exception on error 357 string decodeString(string s){ 358 string r; 359 for (uinteger i = 0; i < s.length; i ++){ 360 if (s[i] == '\\'){ 361 // read the next char 362 if (i == s.length-1){ 363 throw new Exception("unexpected end of string"); 364 } 365 char nextChar = s[i+1]; 366 if (nextChar == '"'){ 367 r ~= '"'; 368 }else if (nextChar == 'n'){ 369 r ~= '\n'; 370 }else if (nextChar == 't'){ 371 r ~= '\t'; 372 }else if (nextChar == '\\'){ 373 r ~= '\\'; 374 }else if (nextChar == '\''){ 375 r ~= '\''; 376 }else{ 377 throw new Exception("\\"~nextChar~" is not an available character"); 378 } 379 i ++; 380 continue; 381 } 382 r ~= s[i]; 383 } 384 return r; 385 } 386 387 /// Generates a string containing Function Name, along with it's argument types. 388 /// Makes it easier to differentiate b/w function overloads 389 /// 390 /// Arguments: 391 /// `name` is the function name 392 /// `argTypes` is the array of it's arguments' Data Types 393 /// 394 /// Returns: the byte code style function name 395 string encodeFunctionName (string name, DataType[] argTypes){ 396 string r = name ~ '/'; 397 foreach (argType; argTypes) 398 r = r ~ argType.getStr~ '/'; 399 return r; 400 } 401 /// 402 unittest{ 403 assert ("abcd".encodeFunctionName ([DataType(DataType.Type.Double,3),DataType(DataType.Type.Void)]) == 404 "abcd/double[][][]/void/" 405 ); 406 } 407 408 /// matches argument types with defined argument types. Used by ASTGen 409 /// 410 /// * `void` will match true against all types (arrays, and even references) 411 /// * `@void` will match true against only references of any type 412 /// * `@void[]` will match true against only references of any type of array 413 /// * `void[]` will match true against any type of array 414 /// 415 /// returns: true if match successful, else, false 416 bool matchArguments(DataType[] definedTypes, DataType[] argTypes){ 417 if (argTypes.length != definedTypes.length) 418 return false; 419 for (uinteger i = 0; i < argTypes.length; i ++){ 420 if (definedTypes[i].isRef != argTypes[i].isRef) 421 return false; 422 if (definedTypes[i].type == DataType.Type.Void || argTypes[i].type == DataType.Type.Void){ 423 if (definedTypes[i].arrayDimensionCount != argTypes[i].arrayDimensionCount) 424 return false; 425 continue; 426 } 427 // check the array dimension 428 if (definedTypes[i].arrayDimensionCount != argTypes[i].arrayDimensionCount) 429 return false; 430 if (argTypes[i].type != definedTypes[i].type) 431 return false; 432 } 433 return true; 434 } 435 436 /// Each token is stored as a `Token` with the type and the actual token 437 package struct Token{ 438 /// Specifies type of token 439 /// 440 /// used only in `compiler.tokengen` 441 enum Type{ 442 String,/// That the token is: `"SOME STRING"` 443 Char, /// That the token is: `'C'` # Some character 444 Integer,/// That the token an int 445 Double, /// That the token is a double (floating point) value 446 Identifier,/// That the token is an identifier. i.e token is a variable name or a function name. For a token to be marked as Identifier, it doesn't need to be defined in `new()` 447 DataType, /// the token is a data type 448 MemberSelector, /// a member selector operator 449 AssignmentOperator, /// and assignmentOperator 450 Operator,/// That the token is an operator, like `+`, `==` etc 451 Keyword,/// A `function` or `var` ... 452 Comma,/// That its a comma: `,` 453 StatementEnd,/// A semicolon 454 ParanthesesOpen,/// `(` 455 ParanthesesClose,/// `)` 456 IndexBracketOpen,/// `[` 457 IndexBracketClose,///`]` 458 BlockStart,///`{` 459 BlockEnd,///`}` 460 } 461 Type type;/// type of token 462 string token;/// token 463 this(Type tType, string tToken){ 464 type = tType; 465 token = tToken; 466 } 467 } 468 /// To store Tokens with Types where the line number of each token is required 469 package struct TokenList{ 470 Token[] tokens; /// Stores the tokens 471 uinteger[] tokenPerLine; /// Stores the number of tokens in each line 472 /// Returns the line number a token is in by usin the index of the token in `tokens` 473 /// 474 /// The returned value is the line-number, not index, it starts from 1, not zero 475 uinteger getTokenLine(uinteger tokenIndex){ 476 uinteger i = 0, chars = 0; 477 tokenIndex ++; 478 for (; chars < tokenIndex && i < tokenPerLine.length; i ++){ 479 chars += tokenPerLine[i]; 480 } 481 return i; 482 } 483 /// reads tokens into a string 484 static string toString(Token[] t){ 485 char[] r; 486 // set length 487 uinteger length = 0; 488 for (uinteger i = 0; i < t.length; i ++){ 489 length += t[i].token.length; 490 } 491 r.length = length; 492 // read em all into r; 493 for (uinteger i = 0, writeTo = 0; i < t.length; i ++){ 494 r[writeTo .. writeTo + t[i].token.length] = t[i].token; 495 writeTo += t[i].token.length; 496 } 497 return cast(string)r; 498 } 499 } 500 501 /// Checks if the brackets in a tokenlist are in correct order, and are closed 502 /// 503 /// In case not, returns false, and appends error to `errorLog` 504 package bool checkBrackets(TokenList tokens, LinkedList!CompileError errors){ 505 enum BracketType{ 506 Round, 507 Square, 508 Block 509 } 510 BracketType[Token.Type] brackOpenIdent = [ 511 Token.Type.ParanthesesOpen: BracketType.Round, 512 Token.Type.IndexBracketOpen: BracketType.Square, 513 Token.Type.BlockStart: BracketType.Block 514 ]; 515 BracketType[Token.Type] brackCloseIdent = [ 516 Token.Type.ParanthesesClose: BracketType.Round, 517 Token.Type.IndexBracketClose: BracketType.Square, 518 Token.Type.BlockEnd:BracketType.Block 519 ]; 520 Stack!BracketType bracks = new Stack!BracketType; 521 Stack!uinteger bracksStartIndex = new Stack!uinteger; 522 BracketType curType; 523 bool r = true; 524 for (uinteger lastInd = tokens.tokens.length-1, i = 0; i<=lastInd; i++){ 525 if (tokens.tokens[i].type in brackOpenIdent){ 526 bracks.push(brackOpenIdent[tokens.tokens[i].type]); 527 bracksStartIndex.push(i); 528 }else if (tokens.tokens[i].type in brackCloseIdent){ 529 bracksStartIndex.pop(); 530 if (bracks.pop != brackCloseIdent[tokens.tokens[i].type]){ 531 errors.append(CompileError(tokens.getTokenLine(i), 532 "brackets order is wrong; first opened must be last closed")); 533 r = false; 534 break; 535 } 536 }else if (i == lastInd && bracks.count > 0){ 537 // no bracket ending 538 i = -2; 539 errors.append(CompileError(tokens.getTokenLine(bracksStartIndex.pop), "bracket not closed")); 540 r = false; 541 break; 542 } 543 } 544 545 .destroy(bracks); 546 .destroy(bracksStartIndex); 547 return r; 548 } 549 550 /// Returns index of closing/openinig bracket of the provided bracket 551 /// 552 /// `forward` if true, then the search is in forward direction, i.e, the closing bracket is searched for 553 /// `tokens` is the array of tokens to search in 554 /// `index` is the index of the opposite bracket 555 /// 556 /// It only works correctly if the brackets are in correct order, and the closing bracket is present 557 /// so, before calling this, `compiler.misc.checkBrackets` should be called 558 package uinteger tokenBracketPos(bool forward=true)(Token[] tokens, uinteger index){ 559 Token.Type[] closingBrackets = [ 560 Token.Type.BlockEnd, 561 Token.Type.IndexBracketClose, 562 Token.Type.ParanthesesClose 563 ]; 564 Token.Type[] openingBrackets = [ 565 Token.Type.BlockStart, 566 Token.Type.IndexBracketOpen, 567 Token.Type.ParanthesesOpen 568 ]; 569 uinteger count; // stores how many closing/opening brackets before we reach the desired one 570 uinteger i = index; 571 for (uinteger lastInd = (forward ? tokens.length : 0); i != lastInd; (forward ? i ++: i --)){ 572 if ((forward ? openingBrackets : closingBrackets).hasElement(tokens[i].type)){ 573 count ++; 574 }else if ((forward ? closingBrackets : openingBrackets).hasElement(tokens[i].type)){ 575 count --; 576 } 577 if (count == 0){ 578 break; 579 } 580 } 581 return i; 582 } 583 /// 584 unittest{ 585 Token[] tokens; 586 tokens = [ 587 Token(Token.Type.Comma, ","), Token(Token.Type.BlockStart,"{"), Token(Token.Type.Comma,","), 588 Token(Token.Type.IndexBracketOpen,"["),Token(Token.Type.IndexBracketClose,"]"),Token(Token.Type.BlockEnd,"}") 589 ]; 590 assert(tokens.tokenBracketPos!true(1) == 5); 591 assert(tokens.tokenBracketPos!false(5) == 1); 592 assert(tokens.tokenBracketPos!true(3) == 4); 593 assert(tokens.tokenBracketPos!false(4) == 3); 594 } 595 596 /// removes "extra" whitespace from a string. i.e, if there are more than 1 consecutive spaces/tabs, one is removed 597 /// 598 /// `line` is the string to remove whitespace from 599 /// `commentStart` is the character that marks the start of a comment, if ==0, then comments are not not considered 600 package string removeWhitespace(string line, char commentStart=0){ 601 bool prevWasWhitespace = true; 602 string r; 603 foreach (c; line){ 604 if (prevWasWhitespace && (c == ' ' || c == '\t')){ 605 // do not add this one then 606 continue; 607 }else if (c != 0 && c == commentStart){ 608 break; 609 }else{ 610 prevWasWhitespace = false; 611 r ~= c; 612 } 613 } 614 return r; 615 }