#include "lexer.h" #include "../bsh.h" #include "lexer_tools.h" extern struct shell *shell; struct lexer_token *lexer_token_free(struct lexer_token *token) { free(token->value); free(token); return NULL; } struct lexer *lexer_create(char *input) { struct lexer *lexer = calloc(1, sizeof(struct lexer)); lexer->input = input; lexer->tail = NULL; lexer->head = NULL; lexer->tokens = NULL; return lexer; } struct lexer_token *lexer_peek(struct lexer *lexer) { return lexer->head; } struct lexer_token *lexer_pop(struct lexer *lexer) { struct lexer_token *token = lexer->head; lexer->head = lexer->head->next; return token; } void lexer_append(struct lexer *lexer, struct lexer_token *token) { token->next = NULL; if (lexer->tail) { lexer->tail->next = token; lexer->tail = token; } else { lexer->tokens = token; lexer->tail = token; } } void lexer_free(struct lexer *lexer) { struct lexer_token *token = lexer->tokens; while (token) { struct lexer_token *next = token->next; lexer_token_free(token); token = next; } struct lexer_alias *alias = lexer->alias_list; while (alias) { struct lexer_alias *next = alias->next; free(alias->name); struct lexer_token *token = alias->value; while (token) { struct lexer_token *next = token->next; lexer_token_free(token); token = next; } free(alias); alias = next; } lexer->alias_list = NULL; lexer->head = NULL; lexer->tail = NULL; free(lexer); } static bool is_separator(char c) { return (c == ';' || c == '\n'); } static enum token_type get_separator(char c) { if (c == ';') return TOKEN_SEMICOLON; if (c == '\n') return TOKEN_NEWLINE; return TOKEN_ERROR; } static bool is_quote(char c) { return (c == '\'' || c == '\"' || c == '`'); } static enum token_type get_quote(char c) { if (c == '\'') return TOKEN_WORD_SINGLE_QUOTE; if (c == '\"') return TOKEN_WORD_DOUBLE_QUOTE; if (c == '`') return TOKEN_BACKTICK; return TOKEN_ERROR; } static void create_word_and_append(char *word, int word_pos, bool *in_cmd, struct lexer *lexer, enum token_type *word_type) { if (!word) return; word[word_pos] = 0; struct lexer_alias *alias = get_alias(word); if (alias && !lexer->alias) { lexer_append_alias(lexer, alias); free(word); return; } if (*word_type == TOKEN_WORD && (!strcmp(word, "alias") || !strcmp(word, "unalias"))) { lexer->alias_prev = lexer->tail; create_and_append_token( lexer, !strcmp(word, "alias") ? TOKEN_ALIAS : TOKEN_UNALIAS, NULL); lexer->alias = lexer->tail; free(word); *word_type = TOKEN_WORD; return; } if (*word_type == TOKEN_WORD && (!strcmp(word, "in")) && ((!lexer->in_for && lexer->found_for) || lexer->found_case)) { create_and_append_token(lexer, TOKEN_IN, NULL); if (lexer->found_for) lexer->in_for = true; free(word); return; } struct lexer_token *token = calloc(1, sizeof(struct lexer_token)); token->type = is_keyword(word) && !lexer->alias && (!(*in_cmd) || lexer->found_case || (lexer->found_for && !strcmp(word, "do"))) ? get_keyword(word) : *word_type; if (token->type >= TOKEN_WORD && !lexer->found_case) *in_cmd = true; if (token->type == TOKEN_FOR) lexer->found_for = true; if (token->type == TOKEN_CASE) lexer->found_case = true; if (token->type == TOKEN_ESAC) lexer->found_case = false; token->value = word; word = NULL; word_pos = 0; lexer_append(lexer, token); } static bool is_pipe(char c, char next) { return (c == '|' && next != '|'); } static bool is_redir(char c1) { return (c1 == '<' || c1 == '>'); } static char *get_redir(char c1, char c2) { char *res = calloc(3, sizeof(char)); if (c1 == '<') { res[0] = '<'; if (c2 == '&' || c2 == '>') res[1] = c2; } if (c1 == '>') { res[0] = '>'; if (c2 == '&' || c2 == '>' || c2 == '|') res[1] = c2; } return res; } static bool is_special(char c) { return (c == '(' || c == ')' || c == '{' || c == '}' || c == '$'); } static enum token_type get_special(char c) { if (c == '(') return TOKEN_PARENTHESIS_OPEN; if (c == ')') return TOKEN_PARENTHESIS_CLOSE; if (c == '{') return TOKEN_BRACE_OPEN; if (c == '}') return TOKEN_BRACE_CLOSE; if (c == '$') return TOKEN_DOLLAR; return TOKEN_ERROR; } static bool is_word_alphanum(char *word, int len) { for (int i = 0; i < len; i++) if (!((word[i] >= 'a' && word[i] <= 'z') || (word[i] >= 'A' && word[i] <= 'Z') || (word[i] >= '0' && word[i] <= '9') || word[i] == '_')) return false; return true; } static void word_lexer(struct lexer *lexer, char *input, bool *in_cmd, enum token_type *word_type) { int j = 0; char *word = NULL; int word_pos = 0; while (input[j]) { if (input[j] == '\\') { word = realloc(word, (word_pos + 3) * sizeof(char)); word[word_pos++] = input[j++]; if (input[j] == 0) break; word[word_pos++] = input[j++]; if (input[j] == 0) break; } if ((*word_type == TOKEN_WORD && is_separator(input[j])) || (is_pipe(input[j], input[j + 1]) && *word_type == TOKEN_WORD)) { if (word) { create_word_and_append(word, word_pos, in_cmd, lexer, word_type); word = NULL; word_pos = 0; } create_and_append_token( lexer, is_separator(input[j]) ? get_separator(input[j]) : TOKEN_PIPE, NULL); if (is_separator(input[j])) { if (lexer->alias != NULL && lexer->alias->next != lexer->tail) { if (lexer->alias->type == TOKEN_ALIAS) process_alias(lexer->alias_prev, lexer->alias, lexer); else process_unalias(lexer->alias_prev, lexer->alias, lexer); } else if (lexer->alias) { if (lexer->alias_prev) { lexer_token_free(lexer->alias_prev->next); lexer->alias_prev->next = lexer->tail; } else { lexer_token_free(lexer->alias); lexer->tokens = lexer->tail; } } if (input[j] == '\n') { struct lexer_alias *alias = lexer->alias_list; while (alias) { struct lexer_alias *next = alias->next; alias->next = shell->alias_list; shell->alias_list = alias; alias = next; } lexer->alias_list = NULL; } lexer->alias = NULL; lexer->in_for = false; lexer->found_for = false; } *in_cmd = false; } else if (*word_type == TOKEN_WORD && ((input[j] == '&' && input[j + 1] == '&') || (input[j] == '|' && input[j + 1] == '|'))) { if (word) { create_word_and_append(word, word_pos, in_cmd, lexer, word_type); word = NULL; word_pos = 0; } create_and_append_token( lexer, input[j] == '&' ? TOKEN_AND : TOKEN_OR, NULL); j++; } else if (*word_type == TOKEN_WORD && is_special(input[j])) { if (input[j] == '}' && lexer->in_variable) { word = realloc(word, (word_pos + 2) * sizeof(char)); word[word_pos++] = input[j]; lexer->in_variable = false; } else { if (word && (input[j] != '$' || input[j + 1] == '(')) { create_word_and_append(word, word_pos, in_cmd, lexer, word_type); word = NULL; word_pos = 0; } if (input[j] == '$') { if (input[j + 1] == '(') { *in_cmd = false; j++; create_and_append_token(lexer, TOKEN_SUBSTITUTION_OPEN, NULL); } else { word = realloc(word, (word_pos + 3) * sizeof(char)); word[word_pos++] = input[j]; if (input[j + 1] == '{' || input[j + 1] == '$') { word[word_pos++] = input[++j]; lexer->in_variable = true; } } } else { if (input[j] == '{' || input[j] == '(') *in_cmd = false; create_and_append_token(lexer, get_special(input[j]), NULL); } } } else if (*word_type == TOKEN_WORD && is_redir(input[j])) { if (word) { word[word_pos] = 0; if (is_int(word)) { create_and_append_token(lexer, TOKEN_IONUMBER, word); } else create_word_and_append(word, word_pos, in_cmd, lexer, word_type); word = NULL; word_pos = 0; } create_and_append_token(lexer, TOKEN_REDIR, get_redir(input[j], input[j + 1])); if (input[j + 1] != 0) j++; } else if (*word_type == TOKEN_WORD && input[j] == '=' && (!lexer->tail || lexer->tail->type != TOKEN_ASSIGNMENT_WORD) && is_word_alphanum(word, word_pos)) { if (word) { create_word_and_append(word, word_pos, in_cmd, lexer, word_type); word = NULL; word_pos = 0; lexer->tail->type = TOKEN_ASSIGNMENT_WORD; } } else if (is_quote(input[j]) && (*word_type == get_quote(input[j]) || *word_type == TOKEN_WORD)) { if (word) { create_word_and_append(word, word_pos, in_cmd, lexer, word_type); word = NULL; word_pos = 0; } if (lexer->alias) { j++; continue; } if (*word_type == TOKEN_WORD && input[j] != '`') *word_type = get_quote(input[j]); else if (*word_type == TOKEN_WORD && input[j] == '`') { create_and_append_token(lexer, TOKEN_BACKTICK, NULL); } else if (get_quote(input[j]) == *word_type) { *word_type = TOKEN_WORD; } } else { word = realloc(word, (word_pos + 2) * sizeof(char)); word[word_pos++] = input[j]; } j++; } if (word) { create_word_and_append(word, word_pos, in_cmd, lexer, word_type); word = NULL; word_pos = 0; } free(input); } void lexer_build(struct lexer *lexer) { bool in_cmd = false; char **words = split_in_words(lexer->input); enum token_type word_type = TOKEN_WORD; for (int i = 0; words[i]; i++) { word_lexer(lexer, words[i], &in_cmd, &word_type); create_and_append_token(lexer, TOKEN_SPACE, NULL); } if (word_type != TOKEN_WORD) { fprintf(stderr, "Error: quote <%c> is not terminated.\n", word_type == TOKEN_WORD_SINGLE_QUOTE ? '\'' : '\"'); shell->return_code = 2; shell->exit = true; } create_and_append_token(lexer, TOKEN_EOF, NULL); process_spaces(lexer); process_export(lexer); if (shell->verbose) lexer_print(lexer); free(words); lexer->head = lexer->tokens; } void lexer_go_back(struct lexer *lexer, struct lexer_token *token) { lexer->head = token; }