462 lines
13 KiB
C
462 lines
13 KiB
C
#include "lexer.h"
|
|
|
|
#include "../bsh.h"
|
|
#include "lexer_tools.h"
|
|
|
|
extern struct shell *shell;
|
|
|
|
struct lexer_token *lexer_token_free(struct lexer_token *token)
|
|
{
|
|
free(token->value);
|
|
free(token);
|
|
return NULL;
|
|
}
|
|
|
|
struct lexer *lexer_create(char *input)
|
|
{
|
|
struct lexer *lexer = calloc(1, sizeof(struct lexer));
|
|
lexer->input = input;
|
|
lexer->tail = NULL;
|
|
lexer->head = NULL;
|
|
lexer->tokens = NULL;
|
|
return lexer;
|
|
}
|
|
|
|
struct lexer_token *lexer_peek(struct lexer *lexer)
|
|
{
|
|
return lexer->head;
|
|
}
|
|
|
|
struct lexer_token *lexer_pop(struct lexer *lexer)
|
|
{
|
|
struct lexer_token *token = lexer->head;
|
|
lexer->head = lexer->head->next;
|
|
return token;
|
|
}
|
|
|
|
void lexer_append(struct lexer *lexer, struct lexer_token *token)
|
|
{
|
|
token->next = NULL;
|
|
if (lexer->tail)
|
|
{
|
|
lexer->tail->next = token;
|
|
lexer->tail = token;
|
|
}
|
|
else
|
|
{
|
|
lexer->tokens = token;
|
|
lexer->tail = token;
|
|
}
|
|
}
|
|
|
|
void lexer_free(struct lexer *lexer)
|
|
{
|
|
struct lexer_token *token = lexer->tokens;
|
|
while (token)
|
|
{
|
|
struct lexer_token *next = token->next;
|
|
lexer_token_free(token);
|
|
token = next;
|
|
}
|
|
struct lexer_alias *alias = lexer->alias_list;
|
|
while (alias)
|
|
{
|
|
struct lexer_alias *next = alias->next;
|
|
free(alias->name);
|
|
struct lexer_token *token = alias->value;
|
|
while (token)
|
|
{
|
|
struct lexer_token *next = token->next;
|
|
lexer_token_free(token);
|
|
token = next;
|
|
}
|
|
free(alias);
|
|
alias = next;
|
|
}
|
|
lexer->alias_list = NULL;
|
|
lexer->head = NULL;
|
|
lexer->tail = NULL;
|
|
free(lexer);
|
|
}
|
|
|
|
static bool is_separator(char c)
|
|
{
|
|
return (c == ';' || c == '\n');
|
|
}
|
|
|
|
static enum token_type get_separator(char c)
|
|
{
|
|
if (c == ';')
|
|
return TOKEN_SEMICOLON;
|
|
if (c == '\n')
|
|
return TOKEN_NEWLINE;
|
|
return TOKEN_ERROR;
|
|
}
|
|
|
|
static bool is_quote(char c)
|
|
{
|
|
return (c == '\'' || c == '\"' || c == '`');
|
|
}
|
|
|
|
static enum token_type get_quote(char c)
|
|
{
|
|
if (c == '\'')
|
|
return TOKEN_WORD_SINGLE_QUOTE;
|
|
if (c == '\"')
|
|
return TOKEN_WORD_DOUBLE_QUOTE;
|
|
if (c == '`')
|
|
return TOKEN_BACKTICK;
|
|
return TOKEN_ERROR;
|
|
}
|
|
|
|
static void create_word_and_append(char *word, int word_pos, bool *in_cmd,
|
|
struct lexer *lexer,
|
|
enum token_type *word_type)
|
|
{
|
|
if (!word)
|
|
return;
|
|
word[word_pos] = 0;
|
|
struct lexer_alias *alias = get_alias(word);
|
|
if (alias && !lexer->alias)
|
|
{
|
|
lexer_append_alias(lexer, alias);
|
|
free(word);
|
|
return;
|
|
}
|
|
if (*word_type == TOKEN_WORD
|
|
&& (!strcmp(word, "alias") || !strcmp(word, "unalias")))
|
|
{
|
|
lexer->alias_prev = lexer->tail;
|
|
create_and_append_token(
|
|
lexer, !strcmp(word, "alias") ? TOKEN_ALIAS : TOKEN_UNALIAS, NULL);
|
|
lexer->alias = lexer->tail;
|
|
free(word);
|
|
*word_type = TOKEN_WORD;
|
|
return;
|
|
}
|
|
if (*word_type == TOKEN_WORD && (!strcmp(word, "in"))
|
|
&& ((!lexer->in_for && lexer->found_for) || lexer->found_case))
|
|
{
|
|
create_and_append_token(lexer, TOKEN_IN, NULL);
|
|
if (lexer->found_for)
|
|
lexer->in_for = true;
|
|
free(word);
|
|
return;
|
|
}
|
|
struct lexer_token *token = calloc(1, sizeof(struct lexer_token));
|
|
token->type = is_keyword(word) && !lexer->alias
|
|
&& (!(*in_cmd) || lexer->found_case
|
|
|| (lexer->found_for && !strcmp(word, "do")))
|
|
? get_keyword(word)
|
|
: *word_type;
|
|
if (token->type >= TOKEN_WORD && !lexer->found_case)
|
|
*in_cmd = true;
|
|
if (token->type == TOKEN_FOR)
|
|
lexer->found_for = true;
|
|
if (token->type == TOKEN_CASE)
|
|
lexer->found_case = true;
|
|
if (token->type == TOKEN_ESAC)
|
|
lexer->found_case = false;
|
|
token->value = word;
|
|
word = NULL;
|
|
word_pos = 0;
|
|
lexer_append(lexer, token);
|
|
}
|
|
|
|
static bool is_pipe(char c, char next)
|
|
{
|
|
return (c == '|' && next != '|');
|
|
}
|
|
|
|
static bool is_redir(char c1)
|
|
{
|
|
return (c1 == '<' || c1 == '>');
|
|
}
|
|
|
|
static char *get_redir(char c1, char c2)
|
|
{
|
|
char *res = calloc(3, sizeof(char));
|
|
if (c1 == '<')
|
|
{
|
|
res[0] = '<';
|
|
if (c2 == '&' || c2 == '>')
|
|
res[1] = c2;
|
|
}
|
|
if (c1 == '>')
|
|
{
|
|
res[0] = '>';
|
|
if (c2 == '&' || c2 == '>' || c2 == '|')
|
|
res[1] = c2;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
static bool is_special(char c)
|
|
{
|
|
return (c == '(' || c == ')' || c == '{' || c == '}' || c == '$');
|
|
}
|
|
|
|
static enum token_type get_special(char c)
|
|
{
|
|
if (c == '(')
|
|
return TOKEN_PARENTHESIS_OPEN;
|
|
if (c == ')')
|
|
return TOKEN_PARENTHESIS_CLOSE;
|
|
if (c == '{')
|
|
return TOKEN_BRACE_OPEN;
|
|
if (c == '}')
|
|
return TOKEN_BRACE_CLOSE;
|
|
if (c == '$')
|
|
return TOKEN_DOLLAR;
|
|
return TOKEN_ERROR;
|
|
}
|
|
|
|
static bool is_word_alphanum(char *word, int len)
|
|
{
|
|
for (int i = 0; i < len; i++)
|
|
if (!((word[i] >= 'a' && word[i] <= 'z')
|
|
|| (word[i] >= 'A' && word[i] <= 'Z')
|
|
|| (word[i] >= '0' && word[i] <= '9') || word[i] == '_'))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static void word_lexer(struct lexer *lexer, char *input, bool *in_cmd,
|
|
enum token_type *word_type)
|
|
{
|
|
int j = 0;
|
|
char *word = NULL;
|
|
int word_pos = 0;
|
|
while (input[j])
|
|
{
|
|
if (input[j] == '\\')
|
|
{
|
|
word = realloc(word, (word_pos + 3) * sizeof(char));
|
|
word[word_pos++] = input[j++];
|
|
if (input[j] == 0)
|
|
break;
|
|
word[word_pos++] = input[j++];
|
|
if (input[j] == 0)
|
|
break;
|
|
}
|
|
if ((*word_type == TOKEN_WORD && is_separator(input[j]))
|
|
|| (is_pipe(input[j], input[j + 1]) && *word_type == TOKEN_WORD))
|
|
{
|
|
if (word)
|
|
{
|
|
create_word_and_append(word, word_pos, in_cmd, lexer,
|
|
word_type);
|
|
word = NULL;
|
|
word_pos = 0;
|
|
}
|
|
create_and_append_token(
|
|
lexer,
|
|
is_separator(input[j]) ? get_separator(input[j]) : TOKEN_PIPE,
|
|
NULL);
|
|
if (is_separator(input[j]))
|
|
{
|
|
if (lexer->alias != NULL && lexer->alias->next != lexer->tail)
|
|
{
|
|
if (lexer->alias->type == TOKEN_ALIAS)
|
|
process_alias(lexer->alias_prev, lexer->alias, lexer);
|
|
else
|
|
process_unalias(lexer->alias_prev, lexer->alias, lexer);
|
|
}
|
|
else if (lexer->alias)
|
|
{
|
|
if (lexer->alias_prev)
|
|
{
|
|
lexer_token_free(lexer->alias_prev->next);
|
|
lexer->alias_prev->next = lexer->tail;
|
|
}
|
|
else
|
|
{
|
|
lexer_token_free(lexer->alias);
|
|
lexer->tokens = lexer->tail;
|
|
}
|
|
}
|
|
if (input[j] == '\n')
|
|
{
|
|
struct lexer_alias *alias = lexer->alias_list;
|
|
while (alias)
|
|
{
|
|
struct lexer_alias *next = alias->next;
|
|
alias->next = shell->alias_list;
|
|
shell->alias_list = alias;
|
|
alias = next;
|
|
}
|
|
lexer->alias_list = NULL;
|
|
}
|
|
lexer->alias = NULL;
|
|
lexer->in_for = false;
|
|
lexer->found_for = false;
|
|
}
|
|
*in_cmd = false;
|
|
}
|
|
else if (*word_type == TOKEN_WORD
|
|
&& ((input[j] == '&' && input[j + 1] == '&')
|
|
|| (input[j] == '|' && input[j + 1] == '|')))
|
|
{
|
|
if (word)
|
|
{
|
|
create_word_and_append(word, word_pos, in_cmd, lexer,
|
|
word_type);
|
|
word = NULL;
|
|
word_pos = 0;
|
|
}
|
|
create_and_append_token(
|
|
lexer, input[j] == '&' ? TOKEN_AND : TOKEN_OR, NULL);
|
|
j++;
|
|
}
|
|
else if (*word_type == TOKEN_WORD && is_special(input[j]))
|
|
{
|
|
if (input[j] == '}' && lexer->in_variable)
|
|
{
|
|
word = realloc(word, (word_pos + 2) * sizeof(char));
|
|
word[word_pos++] = input[j];
|
|
lexer->in_variable = false;
|
|
}
|
|
else
|
|
{
|
|
if (word && (input[j] != '$' || input[j + 1] == '('))
|
|
{
|
|
create_word_and_append(word, word_pos, in_cmd, lexer,
|
|
word_type);
|
|
word = NULL;
|
|
word_pos = 0;
|
|
}
|
|
if (input[j] == '$')
|
|
{
|
|
if (input[j + 1] == '(')
|
|
{
|
|
*in_cmd = false;
|
|
j++;
|
|
create_and_append_token(lexer, TOKEN_SUBSTITUTION_OPEN,
|
|
NULL);
|
|
}
|
|
else
|
|
{
|
|
word = realloc(word, (word_pos + 3) * sizeof(char));
|
|
word[word_pos++] = input[j];
|
|
if (input[j + 1] == '{' || input[j + 1] == '$')
|
|
{
|
|
word[word_pos++] = input[++j];
|
|
lexer->in_variable = true;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (input[j] == '{' || input[j] == '(')
|
|
*in_cmd = false;
|
|
create_and_append_token(lexer, get_special(input[j]), NULL);
|
|
}
|
|
}
|
|
}
|
|
else if (*word_type == TOKEN_WORD && is_redir(input[j]))
|
|
{
|
|
if (word)
|
|
{
|
|
word[word_pos] = 0;
|
|
if (is_int(word))
|
|
{
|
|
create_and_append_token(lexer, TOKEN_IONUMBER, word);
|
|
}
|
|
else
|
|
create_word_and_append(word, word_pos, in_cmd, lexer,
|
|
word_type);
|
|
word = NULL;
|
|
word_pos = 0;
|
|
}
|
|
create_and_append_token(lexer, TOKEN_REDIR,
|
|
get_redir(input[j], input[j + 1]));
|
|
if (input[j + 1] != 0)
|
|
j++;
|
|
}
|
|
else if (*word_type == TOKEN_WORD && input[j] == '='
|
|
&& (!lexer->tail || lexer->tail->type != TOKEN_ASSIGNMENT_WORD)
|
|
&& is_word_alphanum(word, word_pos))
|
|
{
|
|
if (word)
|
|
{
|
|
create_word_and_append(word, word_pos, in_cmd, lexer,
|
|
word_type);
|
|
word = NULL;
|
|
word_pos = 0;
|
|
lexer->tail->type = TOKEN_ASSIGNMENT_WORD;
|
|
}
|
|
}
|
|
else if (is_quote(input[j])
|
|
&& (*word_type == get_quote(input[j])
|
|
|| *word_type == TOKEN_WORD))
|
|
{
|
|
if (word)
|
|
{
|
|
create_word_and_append(word, word_pos, in_cmd, lexer,
|
|
word_type);
|
|
word = NULL;
|
|
word_pos = 0;
|
|
}
|
|
if (lexer->alias)
|
|
{
|
|
j++;
|
|
continue;
|
|
}
|
|
if (*word_type == TOKEN_WORD && input[j] != '`')
|
|
*word_type = get_quote(input[j]);
|
|
else if (*word_type == TOKEN_WORD && input[j] == '`')
|
|
{
|
|
create_and_append_token(lexer, TOKEN_BACKTICK, NULL);
|
|
}
|
|
else if (get_quote(input[j]) == *word_type)
|
|
{
|
|
*word_type = TOKEN_WORD;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
word = realloc(word, (word_pos + 2) * sizeof(char));
|
|
word[word_pos++] = input[j];
|
|
}
|
|
j++;
|
|
}
|
|
if (word)
|
|
{
|
|
create_word_and_append(word, word_pos, in_cmd, lexer, word_type);
|
|
word = NULL;
|
|
word_pos = 0;
|
|
}
|
|
free(input);
|
|
}
|
|
|
|
void lexer_build(struct lexer *lexer)
|
|
{
|
|
bool in_cmd = false;
|
|
char **words = split_in_words(lexer->input);
|
|
enum token_type word_type = TOKEN_WORD;
|
|
for (int i = 0; words[i]; i++)
|
|
{
|
|
word_lexer(lexer, words[i], &in_cmd, &word_type);
|
|
create_and_append_token(lexer, TOKEN_SPACE, NULL);
|
|
}
|
|
if (word_type != TOKEN_WORD)
|
|
{
|
|
fprintf(stderr, "Error: quote <%c> is not terminated.\n",
|
|
word_type == TOKEN_WORD_SINGLE_QUOTE ? '\'' : '\"');
|
|
shell->return_code = 2;
|
|
shell->exit = true;
|
|
}
|
|
create_and_append_token(lexer, TOKEN_EOF, NULL);
|
|
process_spaces(lexer);
|
|
process_export(lexer);
|
|
if (shell->verbose)
|
|
lexer_print(lexer);
|
|
free(words);
|
|
lexer->head = lexer->tokens;
|
|
}
|
|
|
|
void lexer_go_back(struct lexer *lexer, struct lexer_token *token)
|
|
{
|
|
lexer->head = token;
|
|
}
|