diff options
Diffstat (limited to 'src/Compiler/TokenScanner.java')
-rw-r--r-- | src/Compiler/TokenScanner.java | 80 |
1 files changed, 68 insertions, 12 deletions
diff --git a/src/Compiler/TokenScanner.java b/src/Compiler/TokenScanner.java index 02bbbc0..ecb5ad3 100644 --- a/src/Compiler/TokenScanner.java +++ b/src/Compiler/TokenScanner.java @@ -5,6 +5,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +/** + * The lexical scanner, which takes the source code and extracts a list of token objects + */ public class TokenScanner { private String sourceCode; List<Token> tokens = new ArrayList<>(); @@ -12,23 +15,33 @@ public class TokenScanner { private int currentLoc=0; private int line=0; - //Extract tokens from the source code by reading character by character + /** + * Method for extracting tokens, checking each character at a time + * @param sourceCode the original source code as a string + * @return a list of tokens represeting the source code + */ List<Token> extractTokens(String sourceCode){ this.sourceCode=sourceCode; + //Read until the end of the source code as been reached while (!checkEOF()){ tokenStart=currentLoc; readToken(); } + //Add a EOF token on the end tokens.add(new Token(TokenType.EOF, "", null,line)); return tokens; } - //Extract a single token + /** + * Extract a single token + */ private void readToken(){ + //Get the current character and find the matching token char checkChar = sourceCode.charAt(currentLoc); switch(checkChar){ case ' ':break; + //Advance line when line break found case '\n': line++; break; @@ -45,7 +58,7 @@ public class TokenScanner { case ';': createTokenNull(TokenType.SEMI_COLON); break; case ',': createTokenNull(TokenType.COMMA); break; - //Some tokens are multiple characters long (==, <=) etc + //Some tokens are multiple characters long //so need to check next char as well case '=': if (checkNextChar('=')){ @@ -55,6 +68,7 @@ public class TokenScanner { createTokenNull(TokenType.EQUALS); break; } + case ':': if (checkNextChar(':')){ createTokenNull(TokenType.DEFINE); @@ -63,6 +77,7 @@ public class TokenScanner { createTokenNull(TokenType.COLON); break; } + case '<': if (checkNextChar('=')){ createTokenNull(TokenType.LESS_EQUAL); @@ -71,6 +86,7 @@ public class TokenScanner { createTokenNull(TokenType.LESS); break; } + case '>': if (checkNextChar('=')){ createTokenNull(TokenType.GREATER_EQUAL); @@ -79,6 +95,7 @@ public class TokenScanner { createTokenNull(TokenType.GREATER); break; } + case '"': while(lookAhead()!='"' && !checkEOF()){ currentLoc++; @@ -90,6 +107,7 @@ public class TokenScanner { currentLoc++; createToken(TokenType.STRING, sourceCode.substring(tokenStart, currentLoc+1)); break; + case '.': if(checkIsAlpha(lookAhead())) while (checkIsAlpha(lookAhead())){ @@ -112,9 +130,11 @@ public class TokenScanner { } else { Language.displayError(line, "Expected '.' after logical expression"); } + + //Find tokens starting with alphanumeric characters default: - //Check for numer + //Check for numeric characters if (checkIsDigit(checkChar)){ String type = "int"; while (checkIsDigit(lookAhead())){ @@ -134,19 +154,24 @@ public class TokenScanner { createToken(TokenType.NUMBER, Integer.parseInt(sourceCode.substring(tokenStart, currentLoc+1))); } } + + //Check alphabetical character else if (checkIsAlpha(checkChar)){ while (checkIsAlpha(lookAhead())){ currentLoc++; } String text = sourceCode.substring(tokenStart, currentLoc+1); + + //Compare against a list of keywords in the language TokenType type = keywords.get(text); if(type == null){ createToken(TokenType.IDENTIFIER, text); } else{ createToken(type, text); } - + } else { + //Report an unknown character Language.displayError(line,"Unexpected Character"); } } @@ -154,23 +179,37 @@ public class TokenScanner { } - //Test for end of file + /** + * Method to check the end of the source code + * @return if the end of the source code has been reached + */ private boolean checkEOF(){ return currentLoc>=sourceCode.length(); } - //Create a token without a value + /** + * Create a token with a value of null + * @param type the token type + */ private void createTokenNull(TokenType type){ createToken(type, null); } - //Create token + /** + * Create a token and add to the list + * @param type the token type + * @param value the value of the token + */ private void createToken(TokenType type, Object value){ String tokenText = sourceCode.substring(tokenStart, currentLoc+1); tokens.add(new Token(type, tokenText, value, line)); } - //Check if the next char matches a given char + /** + * Compare the next character in the source code to a given character + * @param matchChar the character to compare against + * @return if the character matches + */ private boolean checkNextChar(char matchChar){ if (checkEOF()){ return false; @@ -182,7 +221,10 @@ public class TokenScanner { return false; } - //Look at the next char in the source code + /** + * gets the next character in the source code + * @return the next character + */ private char lookAhead(){ if (currentLoc+1>=sourceCode.length()){ return ' '; @@ -192,7 +234,10 @@ public class TokenScanner { } } - //Look 2 chars ahead in the source code + /** + * look at the character two ahead in the source code + * @return the character two ahead + */ private char lookTwoAhead(){ if (currentLoc+2>=sourceCode.length()){ return ' '; @@ -202,16 +247,26 @@ public class TokenScanner { } } - //Check if a given char is a digit + /** + * checks if a given character is numerical + * @param checkChar the character to check + * @return if the character is numerical + */ private boolean checkIsDigit(char checkChar){ return checkChar>='0' && checkChar<='9'; } + /** + * check if a character is alphabetical + * @param checkChar the character to check + * @return if the character is alphabetical + */ private boolean checkIsAlpha(char checkChar){ return ('a'<=checkChar && checkChar<='z')|| ('A'<=checkChar && checkChar<='Z'); } + //A hashmap of the keywords used in the language private static final Map<String, TokenType> keywords; static { @@ -232,5 +287,6 @@ public class TokenScanner { keywords.put("program", TokenType.PROGRAM); keywords.put("return", TokenType.RETURN); keywords.put("function", TokenType.FUNCTION); + keywords.put("subroutine", TokenType.SUBROUTINE); } } |