algorembrant's picture
Upload 25 files
59da845 verified
use nom::{
branch::alt,
bytes::complete::{tag, take_while1, take_until, take_while},
character::complete::{alpha1, alphanumeric1, char, digit1, multispace0, multispace1, anychar},
combinator::{map, map_res, recognize, value, eof, opt},
multi::many0,
sequence::{delimited, pair, preceded, tuple},
IResult,
};
use crate::compiler::lexer::Token;
use std::str::FromStr;
pub struct Lexer;
impl Lexer {
pub fn tokenize(input: &str) -> IResult<&str, Vec<Token>> {
many0(preceded(
// Skip whitespace and comments
alt((
multispace1,
// Line comment //
recognize(pair(tag("//"), take_while(|c| c != '\n' && c != '\r'))),
// Block comment /* */
recognize(delimited(tag("/*"), take_until("*/"), tag("*/"))),
)),
// Parse tokens
alt((
// Keywords & Types
Self::parse_keyword,
// Literals
Self::parse_double_literal,
Self::parse_int_literal,
Self::parse_string_literal,
Self::parse_bool_literal,
// Identifiers
Self::parse_identifier,
// Operators & Punctuation (Order matters for prefix matching, e.g. == before =)
Self::parse_operator,
Self::parse_punctuation,
))
))(input)
}
fn parse_keyword(input: &str) -> IResult<&str, Token> {
alt((
value(Token::Int, tag("int")),
value(Token::Double, tag("double")),
value(Token::String, tag("string")),
value(Token::Bool, tag("bool")),
value(Token::Void, tag("void")),
value(Token::Input, tag("input")),
value(Token::If, tag("if")),
value(Token::Else, tag("else")),
value(Token::For, tag("for")),
value(Token::While, tag("while")),
value(Token::Return, tag("return")),
value(Token::MqlTick, tag("MqlTick")),
value(Token::MqlRates, tag("MqlRates")),
))(input)
}
fn parse_bool_literal(input: &str) -> IResult<&str, Token> {
alt((
value(Token::BoolLiteral(true), tag("true")),
value(Token::BoolLiteral(false), tag("false")),
))(input)
}
fn parse_identifier(input: &str) -> IResult<&str, Token> {
map(
recognize(pair(
alt((alpha1, tag("_"))),
take_while(|c: char| c.is_alphanumeric() || c == '_')
)),
|s: &str| Token::Identifier(s.to_string())
)(input)
}
fn parse_int_literal(input: &str) -> IResult<&str, Token> {
map_res(digit1, |s: &str| s.parse::<i64>().map(Token::IntLiteral))(input)
}
fn parse_double_literal(input: &str) -> IResult<&str, Token> {
map_res(
recognize(tuple((
digit1,
char('.'),
digit1
))),
|s: &str| s.parse::<f64>().map(Token::DoubleLiteral)
)(input)
}
fn parse_string_literal(input: &str) -> IResult<&str, Token> {
map(
delimited(char('"'), take_while(|c| c != '"'), char('"')),
|s: &str| Token::StringLiteral(s.to_string())
)(input)
}
fn parse_operator(input: &str) -> IResult<&str, Token> {
alt((
value(Token::Equals, tag("==")),
value(Token::NotEquals, tag("!=")),
value(Token::GreaterEq, tag(">=")),
value(Token::LessEq, tag("<=")),
value(Token::And, tag("&&")),
value(Token::Or, tag("||")),
value(Token::Assign, tag("=")),
value(Token::Greater, tag(">")),
value(Token::Less, tag("<")),
value(Token::Plus, tag("+")),
value(Token::Minus, tag("-")),
value(Token::Star, tag("*")),
value(Token::Slash, tag("/")),
value(Token::Not, tag("!")),
))(input)
}
fn parse_punctuation(input: &str) -> IResult<&str, Token> {
alt((
value(Token::OpenParen, tag("(")),
value(Token::CloseParen, tag(")")),
value(Token::OpenBrace, tag("{")),
value(Token::CloseBrace, tag("}")),
value(Token::OpenBracket, tag("[")),
value(Token::CloseBracket, tag("]")),
value(Token::Comma, tag(",")),
value(Token::Semicolon, tag(";")),
value(Token::Dot, tag(".")),
))(input)
}
}