1 /++
2 Some misc stuff used by the compiler
3 +/
4 module qscript.compiler.misc;
5 
6 import utils.misc;
7 import utils.lists;
8 
9 import std.range;
10 import std.conv : to;
11 
12 /// An array containing all chars that an identifier can contain
13 package const char[] IDENT_CHARS = iota('a', 'z'+1).array~iota('A', 'Z'+1).array~iota('0', '9'+1).array~[cast(int)'_'];
14 /// An array containing all keywords
15 package const string[] KEYWORDS = ["function", "return", "if", "else", "while", "for", "do", "void", "int", "char", "double"];
16 /// data types
17 package const string[] DATA_TYPES = ["void", "int", "double", "char"];
18 /// An array containing double-operand operators
19 package const string[] OPERATORS = [".", "/", "*", "+", "-", "%", "~", "<", ">", ">=", "<=", "==", "=", "&&", "||"];
20 /// single-operand operators
21 package const string[] SOPERATORS = ["!", "@"];
22 /// An array containing all bool-operators (operators that return true/false)
23 package const string[] BOOL_OPERATORS = ["<", ">", ">=", "<=", "==", "&&", "||"];
24 /// Inbuilt QScript functions (like `length(void[])`)
25 package Function[] INBUILT_FUNCTIONS = [
26 	/// length(@void[], int)
27 	Function("length", DataType(DataType.Type.Void), [DataType(DataType.Type.Void,	1, true), DataType(DataType.Type.Integer)]),
28 	/// length(void[])
29 	Function("length", DataType(DataType.Type.Integer), [DataType(DataType.Type.Void, 1, false)]),
30 	/// length (string)
31 	Function("length", DataType(DataType.Type.Integer), [DataType(DataType.Type.Char, 1, false)]),
32 
33 	/// toInt(string)
34 	Function("toInt", DataType(DataType.Type.Integer), [DataType(DataType.Type.Char, 1)]),
35 	/// toInt(double)
36 	Function("toInt", DataType(DataType.Type.Integer), [DataType(DataType.Type.Double)]),
37 	/// toDouble(string)
38 	Function("toDouble", DataType(DataType.Type.Double), [DataType(DataType.Type.Char, 1)]),
39 	/// toDouble(int)
40 	Function("toDouble", DataType(DataType.Type.Double), [DataType(DataType.Type.Integer)]),
41 	/// toString(int)
42 	Function("toStr", DataType(DataType.Type.Char, 1), [DataType(DataType.Type.Integer)]),
43 	/// toString(double)
44 	Function("toStr", DataType(DataType.Type.Char, 1), [DataType(DataType.Type.Double)]),
45 
46 	/// copy(void[], @void[])
47 	Function("copy", DataType(DataType.Type.Void), [DataType(DataType.Type.Void, 1), DataType(DataType.Type.Void, 1, true)]),
48 ];
49 
50 /// Used by compiler's functions to return error
51 public struct CompileError{
52 	string msg; /// The error stored in a string
53 	uinteger lineno; /// The line number on which the error is
54 	this(uinteger lineNumber, string errorMessage){
55 		lineno = lineNumber;
56 		msg = errorMessage;
57 	}
58 }
59 
60 /// To store information about a function
61 public struct Function{
62 	/// the name of the function
63 	string name;
64 	/// the data type of the value returned by this function
65 	DataType returnType;
66 	/// stores the data type of the arguments received by this function
67 	private DataType[] _argTypes;
68 	/// the data type of the arguments received by this function
69 	/// 
70 	/// if an argType is defined as void, with array dimensions=0, it means accept any type.
71 	/// if an argType is defined as void with array dimensions>0, it means array of any type of that dimensions
72 	@property ref DataType[] argTypes(){
73 		return _argTypes;
74 	}
75 	/// the data type of the arguments received by this function
76 	@property ref DataType[] argTypes(DataType[] newArray){
77 		return _argTypes = newArray.dup;
78 	}
79 	/// constructor
80 	this (string functionName, DataType functionReturnType, DataType[] functionArgTypes){
81 		name = functionName;
82 		returnType = functionReturnType;
83 		_argTypes = functionArgTypes.dup;
84 	}
85 }
86 
87 /// used to store data types for data at compile time
88 public struct DataType{
89 	/// enum defining all data types
90 	public enum Type{
91 		Void,
92 		Char,
93 		Integer,
94 		Double
95 	}
96 	/// the actual data type
97 	Type type = DataType.Type.Void;
98 	/// stores if it's an array. If type is `int`, it will be 0, if `int[]` it will be 1, if `int[][]`, then 2 ...
99 	uinteger arrayDimensionCount = 0;
100 	/// stores if it's a reference to a type
101 	bool isRef = false;
102 	/// returns: true if it's an array. Strings are arrays too (char[])
103 	@property bool isArray(){
104 		if (arrayDimensionCount > 0){
105 			return true;
106 		}
107 		return false;
108 	}
109 	/// constructor
110 	/// 
111 	/// dataType is the type to store
112 	/// arrayDimension is the number of nested arrays
113 	/// isRef is whether the type is a reference to the actual type
114 	this (DataType.Type dataType, uinteger arrayDimension = 0, bool isReference = false){
115 		type = dataType;
116 		arrayDimensionCount = arrayDimension;
117 		isRef = isReference;
118 	}
119 	
120 	/// constructor
121 	/// 
122 	/// `sType` is the type in string form
123 	this (string sType){
124 		fromString(sType);
125 	}
126 	/// constructor
127 	/// 
128 	/// `data` is the data to infer type from
129 	this (Token[] data){
130 		fromData(data);
131 	}
132 	/// reads DataType from a string, in case of failure or bad format in string, throws Exception
133 	void fromString(string s){
134 		isRef = false;
135 		string sType = null;
136 		uinteger indexCount = 0;
137 		// check if it's a ref
138 		if (s.length > 0 && s[0] == '@'){
139 			isRef = true;
140 			s = s.dup;
141 			s = s[1 .. s.length];
142 		}
143 		// read the type
144 		for (uinteger i = 0, lastInd = s.length-1; i < s.length; i ++){
145 			if (s[i] == '['){
146 				sType = s[0 .. i];
147 				break;
148 			}else if (i == lastInd){
149 				sType = s;
150 				break;
151 			}
152 		}
153 		// now read the index
154 		for (uinteger i = sType.length; i < s.length; i ++){
155 			if (s[i] == '['){
156 				// make sure next char is a ']'
157 				i ++;
158 				if (s[i] != ']'){
159 					throw new Exception("invalid data type format");
160 				}
161 				indexCount ++;
162 			}else{
163 				throw new Exception("invalid data type");
164 			}
165 		}
166 		// now check if the type was ok or not
167 		if (sType == "void"){
168 			type = DataType.Type.Void;
169 		}else if (sType == "char"){
170 			type = DataType.Type.Char;
171 		}else if (sType == "int"){
172 			type = DataType.Type.Integer;
173 		}else if (sType == "double"){
174 			type = DataType.Type.Double;
175 		}else{
176 			throw new Exception("invalid data type");
177 		}
178 		arrayDimensionCount = indexCount;
179 	}
180 
181 	/// identifies the data type from the actual data
182 	/// keep in mind, this won't be able to identify if the data type is a reference or not
183 	/// 
184 	/// throws Exception on failure
185 	void fromData(Token[] data){
186 		/// identifies type from data
187 		static DataType.Type identifyType(Token data, ref uinteger arrayDimension){
188 			if (data.type == Token.Type.String){
189 				arrayDimension ++;
190 				return DataType.Type.Char;
191 			}else if (data.type == Token.Type.Char){
192 				return DataType.Type.Char;
193 			}else if (data.type == Token.Type.Integer){
194 				return DataType.Type.Integer;
195 			}else if (data.type == Token.Type.Double){
196 				return DataType.Type.Double;
197 			}else{
198 				throw new Exception("failed to read data type");
199 			}
200 		}
201 		// keeps count of number of "instances" of this function currently being executed
202 		static uinteger callCount = 0;
203 		callCount ++;
204 
205 		if (callCount == 1){
206 			this.arrayDimensionCount = 0;
207 			this.type = DataType.Type.Void;
208 		}
209 		// check if is an array
210 		if (data.length > 1 &&
211 			data[0].type == Token.Type.IndexBracketOpen && data[data.length-1].type == Token.Type.IndexBracketClose){
212 			// is an array
213 			this.arrayDimensionCount ++;
214 			// if elements are arrays, do recursion, else, just identify types
215 			Token[][] elements = splitArray(data);
216 			if (elements.length == 0){
217 				this.type = DataType.Type.Void;
218 			}else{
219 				// determine the type using recursion
220 				// stores the arrayDimensionCount till here
221 				uinteger thisNestCount = this.arrayDimensionCount;
222 				// stores the nestCount for the preious element, -1 if no previous element
223 				integer prevNestCount = -1;
224 				// stores the data type of the last element, void if no last element
225 				DataType.Type prevType = DataType.Type.Void;
226 				// now do recursion, and make sure all types come out same
227 				foreach (element; elements){
228 					fromData(element);
229 					// now make sure the nestCount came out same
230 					if (prevNestCount != -1){
231 						if (prevNestCount != this.arrayDimensionCount){
232 							throw new Exception("inconsistent data types in array elements");
233 						}
234 					}else{
235 						// set new nestCount
236 						prevNestCount = this.arrayDimensionCount;
237 					}
238 					// now to make sure type came out same
239 					if (prevType != DataType.Type.Void){
240 						if (this.type != prevType){
241 							throw new Exception("inconsistent data types in array elements");
242 						}
243 					}else{
244 						prevType = this.type;
245 					}
246 					// re-set the nestCount for the next element
247 					this.arrayDimensionCount = thisNestCount;
248 				}
249 				// now set the nestCount
250 				this.arrayDimensionCount = prevNestCount;
251 			}
252 		}else if (data.length == 0){
253 			this.type = DataType.Type.Void;
254 		}else{
255 			// then it must be only one token, if is zero, then it's void
256 			assert(data.length == 1, "non-array data must be only one token in length");
257 			// now check the type, and set it
258 			this.type = identifyType(data[0], arrayDimensionCount);
259 		}
260 		callCount --;
261 	}
262 
263 	/// converts this DataType to string
264 	string getStr(){
265 		char[] r;
266 		if (type == DataType.Type.Void){
267 			r = cast(char[]) "void";
268 		}else if (type == DataType.Type.Double){
269 			r = cast(char[]) "double";
270 		}else if (type == DataType.Type.Integer){
271 			r = cast(char[]) "int";
272 		}else if (type == DataType.Type.Char){
273 			r = cast(char[]) "char";
274 		}else{
275 			throw new Exception("invalid type stored: "~to!string(type));
276 		}
277 		if (arrayDimensionCount > 0){
278 			uinteger i = r.length;
279 			r.length += arrayDimensionCount * 2;
280 			for (; i < r.length; i += 2){
281 				r[i .. i+2] = "[]";
282 			}
283 		}
284 		return cast(string) (isRef ? '@'~r : r);
285 	}
286 }
287 /// 
288 unittest{
289 	assert(DataType("int") == DataType(DataType.Type.Integer, 0));
290 	assert(DataType("char[][]") == DataType(DataType.Type.Char, 2));
291 	assert(DataType("double[][]") == DataType(DataType.Type.Double, 2));
292 	assert(DataType("void") == DataType(DataType.Type.Void, 0));
293 	// unittests for `fromData`
294 	import qscript.compiler.tokengen : stringToTokens;
295 	DataType dType;
296 	dType.fromData(["\"bla bla\""].stringToTokens);
297 	assert(dType == DataType("char[]"));
298 	dType.fromData(["20"].stringToTokens);
299 	assert(dType == DataType("int"));
300 	dType.fromData(["2.5"].stringToTokens);
301 	assert(dType == DataType("double"));
302 	dType.fromData(["[", "\"bla\"", ",", "\"bla\"", "]"].stringToTokens);
303 	assert(dType == DataType("char[][]"));
304 	dType.fromData(["[", "[", "25.0", ",", "2.5", "]", ",", "[", "15.0", ",", "25.0", "]", "]"].stringToTokens);
305 	assert(dType == DataType("double[][]"));
306 }
307 
308 /// splits an array in tokens format to it's elements
309 /// 
310 /// For example, splitArray("[a, b, c]") will return ["a", "b", "c"]
311 package Token[][] splitArray(Token[] array){
312 	assert(array[0].type == Token.Type.IndexBracketOpen &&
313 		array[array.length - 1].type == Token.Type.IndexBracketClose, "not a valid array");
314 	LinkedList!(Token[]) elements = new LinkedList!(Token[]);
315 	for (uinteger i = 1, readFrom = i; i < array.length; i ++){
316 		// skip any other brackets
317 		if (array[i].type == Token.Type.BlockStart || array[i].type == Token.Type.IndexBracketOpen ||
318 			array[i].type == Token.Type.ParanthesesOpen){
319 			i = tokenBracketPos!(true)(array, i);
320 			continue;
321 		}
322 		// check if comma is here
323 		if (array[i].type == Token.Type.Comma || array[i].type == Token.Type.IndexBracketClose){
324 			if ((readFrom > i || readFrom == i) && array[i].type == Token.Type.Comma){
325 				throw new Exception("syntax error");
326 			}
327 			elements.append(array[readFrom .. i]);
328 			readFrom = i + 1;
329 		}
330 	}
331 	Token[][] r = elements.toArray;
332 	.destroy(elements);
333 	return r;
334 }
335 
336 /// Returns the index of the quotation mark that ends a string
337 /// 
338 /// Returns -1 if not found
339 package integer strEnd(string s, uinteger i){
340 	const char end = s[i] == '\'' ? '\'' : '\"';
341 	for (i++;i<s.length;i++){
342 		if (s[i]=='\\'){
343 			i++;
344 			continue;
345 		}else if (s[i]==end){
346 			break;
347 		}
348 	}
349 	if (i==s.length){i=-1;}
350 	return i;
351 }
352 
353 /// decodes a string. i.e, converts \t to tab, \" to ", etc
354 /// The string must not be surrounded by quoation marks
355 /// 
356 /// throws Exception on error
357 string decodeString(string s){
358 	string r;
359 	for (uinteger i = 0; i < s.length; i ++){
360 		if (s[i] == '\\'){
361 			// read the next char
362 			if (i == s.length-1){
363 				throw new Exception("unexpected end of string");
364 			}
365 			char nextChar = s[i+1];
366 			if (nextChar == '"'){
367 				r ~= '"';
368 			}else if (nextChar == 'n'){
369 				r ~= '\n';
370 			}else if (nextChar == 't'){
371 				r ~= '\t';
372 			}else if (nextChar == '\\'){
373 				r ~= '\\';
374 			}else if (nextChar == '\''){
375 				r ~= '\'';
376 			}else{
377 				throw new Exception("\\"~nextChar~" is not an available character");
378 			}
379 			i ++;
380 			continue;
381 		}
382 		r ~= s[i];
383 	}
384 	return r;
385 }
386 
387 /// Generates a string containing Function Name, along with it's argument types.
388 /// Makes it easier to differentiate b/w function overloads
389 /// 
390 /// Arguments:
391 /// `name` is the function name  
392 /// `argTypes` is the array of it's arguments' Data Types
393 /// 
394 /// Returns: the byte code style function name
395 string encodeFunctionName (string name, DataType[] argTypes){
396 	string r = name ~ '/';
397 	foreach (argType; argTypes)
398 		r = r ~ argType.getStr~ '/';
399 	return r;
400 }
401 /// 
402 unittest{
403 	assert ("abcd".encodeFunctionName ([DataType(DataType.Type.Double,3),DataType(DataType.Type.Void)]) == 
404 			"abcd/double[][][]/void/"
405 		);
406 }
407 
408 /// matches argument types with defined argument types. Used by ASTGen
409 /// 
410 /// * `void` will match true against all types (arrays, and even references)
411 /// * `@void` will match true against only references of any type
412 /// * `@void[]` will match true against only references of any type of array
413 /// * `void[]` will match true against any type of array
414 /// 
415 /// returns: true if match successful, else, false
416 bool matchArguments(DataType[] definedTypes, DataType[] argTypes){
417 	if (argTypes.length != definedTypes.length)
418 		return false;
419 	for (uinteger i = 0; i < argTypes.length; i ++){
420 		if (definedTypes[i].isRef != argTypes[i].isRef)
421 			return false;
422 		if (definedTypes[i].type == DataType.Type.Void || argTypes[i].type == DataType.Type.Void){
423 			if (definedTypes[i].arrayDimensionCount != argTypes[i].arrayDimensionCount)
424 				return false;
425 			continue;
426 		}
427 		// check the array dimension
428 		if (definedTypes[i].arrayDimensionCount != argTypes[i].arrayDimensionCount)
429 			return false;
430 		if (argTypes[i].type != definedTypes[i].type)
431 			return false;
432 	}
433 	return true;
434 }
435 
436 /// Each token is stored as a `Token` with the type and the actual token
437 package struct Token{
438 	/// Specifies type of token
439 	/// 
440 	/// used only in `compiler.tokengen`
441 	enum Type{
442 		String,/// That the token is: `"SOME STRING"`
443 		Char, /// That the token is: `'C'` # Some character
444 		Integer,/// That the token an int
445 		Double, /// That the token is a double (floating point) value
446 		Identifier,/// That the token is an identifier. i.e token is a variable name or a function name.  For a token to be marked as Identifier, it doesn't need to be defined in `new()`
447 		DataType, /// the  token is a data type
448 		MemberSelector, /// a member selector operator
449 		AssignmentOperator, /// and assignmentOperator
450 		Operator,/// That the token is an operator, like `+`, `==` etc
451 		Keyword,/// A `function` or `var` ...
452 		Comma,/// That its a comma: `,`
453 		StatementEnd,/// A semicolon
454 		ParanthesesOpen,/// `(`
455 		ParanthesesClose,/// `)`
456 		IndexBracketOpen,/// `[`
457 		IndexBracketClose,///`]`
458 		BlockStart,///`{`
459 		BlockEnd,///`}`
460 	}
461 	Type type;/// type of token
462 	string token;/// token
463 	this(Type tType, string tToken){
464 		type = tType;
465 		token = tToken;
466 	}
467 }
468 /// To store Tokens with Types where the line number of each token is required
469 package struct TokenList{
470 	Token[] tokens; /// Stores the tokens
471 	uinteger[] tokenPerLine; /// Stores the number of tokens in each line
472 	/// Returns the line number a token is in by usin the index of the token in `tokens`
473 	/// 
474 	/// The returned value is the line-number, not index, it starts from 1, not zero
475 	uinteger getTokenLine(uinteger tokenIndex){
476 		uinteger i = 0, chars = 0;
477 		tokenIndex ++;
478 		for (; chars < tokenIndex && i < tokenPerLine.length; i ++){
479 			chars += tokenPerLine[i];
480 		}
481 		return i;
482 	}
483 	/// reads tokens into a string
484 	static string toString(Token[] t){
485 		char[] r;
486 		// set length
487 		uinteger length = 0;
488 		for (uinteger i = 0; i < t.length; i ++){
489 			length += t[i].token.length;
490 		}
491 		r.length = length;
492 		// read em all into r;
493 		for (uinteger i = 0, writeTo = 0; i < t.length; i ++){
494 			r[writeTo .. writeTo + t[i].token.length] = t[i].token;
495 			writeTo += t[i].token.length;
496 		}
497 		return cast(string)r;
498 	}
499 }
500 
501 /// Checks if the brackets in a tokenlist are in correct order, and are closed
502 /// 
503 /// In case not, returns false, and appends error to `errorLog`
504 package bool checkBrackets(TokenList tokens, LinkedList!CompileError errors){
505 	enum BracketType{
506 		Round,
507 		Square,
508 		Block
509 	}
510 	BracketType[Token.Type] brackOpenIdent = [
511 		Token.Type.ParanthesesOpen: BracketType.Round,
512 		Token.Type.IndexBracketOpen: BracketType.Square,
513 		Token.Type.BlockStart: BracketType.Block
514 	];
515 	BracketType[Token.Type] brackCloseIdent = [
516 		Token.Type.ParanthesesClose: BracketType.Round,
517 		Token.Type.IndexBracketClose: BracketType.Square,
518 		Token.Type.BlockEnd:BracketType.Block
519 	];
520 	Stack!BracketType bracks = new Stack!BracketType;
521 	Stack!uinteger bracksStartIndex = new Stack!uinteger;
522 	BracketType curType;
523 	bool r = true;
524 	for (uinteger lastInd = tokens.tokens.length-1, i = 0; i<=lastInd; i++){
525 		if (tokens.tokens[i].type in brackOpenIdent){
526 			bracks.push(brackOpenIdent[tokens.tokens[i].type]);
527 			bracksStartIndex.push(i);
528 		}else if (tokens.tokens[i].type in brackCloseIdent){
529 			bracksStartIndex.pop();
530 			if (bracks.pop != brackCloseIdent[tokens.tokens[i].type]){
531 				errors.append(CompileError(tokens.getTokenLine(i),
532 						"brackets order is wrong; first opened must be last closed"));
533 				r = false;
534 				break;
535 			}
536 		}else if (i == lastInd && bracks.count > 0){
537 			// no bracket ending
538 			i = -2;
539 			errors.append(CompileError(tokens.getTokenLine(bracksStartIndex.pop), "bracket not closed"));
540 			r = false;
541 			break;
542 		}
543 	}
544 
545 	.destroy(bracks);
546 	.destroy(bracksStartIndex);
547 	return r;
548 }
549 
550 /// Returns index of closing/openinig bracket of the provided bracket  
551 /// 
552 /// `forward` if true, then the search is in forward direction, i.e, the closing bracket is searched for
553 /// `tokens` is the array of tokens to search in
554 /// `index` is the index of the opposite bracket
555 /// 
556 /// It only works correctly if the brackets are in correct order, and the closing bracket is present  
557 /// so, before calling this, `compiler.misc.checkBrackets` should be called
558 package uinteger tokenBracketPos(bool forward=true)(Token[] tokens, uinteger index){
559 	Token.Type[] closingBrackets = [
560 		Token.Type.BlockEnd,
561 		Token.Type.IndexBracketClose,
562 		Token.Type.ParanthesesClose
563 	];
564 	Token.Type[] openingBrackets = [
565 		Token.Type.BlockStart,
566 		Token.Type.IndexBracketOpen,
567 		Token.Type.ParanthesesOpen
568 	];
569 	uinteger count; // stores how many closing/opening brackets before we reach the desired one
570 	uinteger i = index;
571 	for (uinteger lastInd = (forward ? tokens.length : 0); i != lastInd; (forward ? i ++: i --)){
572 		if ((forward ? openingBrackets : closingBrackets).hasElement(tokens[i].type)){
573 			count ++;
574 		}else if ((forward ? closingBrackets : openingBrackets).hasElement(tokens[i].type)){
575 			count --;
576 		}
577 		if (count == 0){
578 			break;
579 		}
580 	}
581 	return i;
582 }
583 ///
584 unittest{
585 	Token[] tokens;
586 	tokens = [
587 		Token(Token.Type.Comma, ","), Token(Token.Type.BlockStart,"{"), Token(Token.Type.Comma,","),
588 		Token(Token.Type.IndexBracketOpen,"["),Token(Token.Type.IndexBracketClose,"]"),Token(Token.Type.BlockEnd,"}")
589 	];
590 	assert(tokens.tokenBracketPos!true(1) == 5);
591 	assert(tokens.tokenBracketPos!false(5) == 1);
592 	assert(tokens.tokenBracketPos!true(3) == 4);
593 	assert(tokens.tokenBracketPos!false(4) == 3);
594 }
595 
596 /// removes "extra" whitespace from a string. i.e, if there are more than 1 consecutive spaces/tabs, one is removed
597 /// 
598 /// `line` is the string to remove whitespace from
599 /// `commentStart` is the character that marks the start of a comment, if ==0, then comments are not not considered
600 package string removeWhitespace(string line, char commentStart=0){
601 	bool prevWasWhitespace = true;
602 	string r;
603 	foreach (c; line){
604 		if (prevWasWhitespace && (c == ' ' || c == '\t')){
605 			// do not add this one then
606 			continue;
607 		}else if (c != 0 && c == commentStart){
608 			break;
609 		}else{
610 			prevWasWhitespace = false;
611 			r ~= c;
612 		}
613 	}
614 	return r;
615 }