use nom::{ branch::alt, bytes::complete::{tag, take_while1, take_until, take_while}, character::complete::{alpha1, alphanumeric1, char, digit1, multispace0, multispace1, anychar}, combinator::{map, map_res, recognize, value, eof, opt}, multi::many0, sequence::{delimited, pair, preceded, tuple}, IResult, }; use crate::compiler::lexer::Token; use std::str::FromStr; pub struct Lexer; impl Lexer { pub fn tokenize(input: &str) -> IResult<&str, Vec> { many0(preceded( // Skip whitespace and comments alt(( multispace1, // Line comment // recognize(pair(tag("//"), take_while(|c| c != '\n' && c != '\r'))), // Block comment /* */ recognize(delimited(tag("/*"), take_until("*/"), tag("*/"))), )), // Parse tokens alt(( // Keywords & Types Self::parse_keyword, // Literals Self::parse_double_literal, Self::parse_int_literal, Self::parse_string_literal, Self::parse_bool_literal, // Identifiers Self::parse_identifier, // Operators & Punctuation (Order matters for prefix matching, e.g. == before =) Self::parse_operator, Self::parse_punctuation, )) ))(input) } fn parse_keyword(input: &str) -> IResult<&str, Token> { alt(( value(Token::Int, tag("int")), value(Token::Double, tag("double")), value(Token::String, tag("string")), value(Token::Bool, tag("bool")), value(Token::Void, tag("void")), value(Token::Input, tag("input")), value(Token::If, tag("if")), value(Token::Else, tag("else")), value(Token::For, tag("for")), value(Token::While, tag("while")), value(Token::Return, tag("return")), value(Token::MqlTick, tag("MqlTick")), value(Token::MqlRates, tag("MqlRates")), ))(input) } fn parse_bool_literal(input: &str) -> IResult<&str, Token> { alt(( value(Token::BoolLiteral(true), tag("true")), value(Token::BoolLiteral(false), tag("false")), ))(input) } fn parse_identifier(input: &str) -> IResult<&str, Token> { map( recognize(pair( alt((alpha1, tag("_"))), take_while(|c: char| c.is_alphanumeric() || c == '_') )), |s: &str| Token::Identifier(s.to_string()) )(input) } fn parse_int_literal(input: &str) -> IResult<&str, Token> { map_res(digit1, |s: &str| s.parse::().map(Token::IntLiteral))(input) } fn parse_double_literal(input: &str) -> IResult<&str, Token> { map_res( recognize(tuple(( digit1, char('.'), digit1 ))), |s: &str| s.parse::().map(Token::DoubleLiteral) )(input) } fn parse_string_literal(input: &str) -> IResult<&str, Token> { map( delimited(char('"'), take_while(|c| c != '"'), char('"')), |s: &str| Token::StringLiteral(s.to_string()) )(input) } fn parse_operator(input: &str) -> IResult<&str, Token> { alt(( value(Token::Equals, tag("==")), value(Token::NotEquals, tag("!=")), value(Token::GreaterEq, tag(">=")), value(Token::LessEq, tag("<=")), value(Token::And, tag("&&")), value(Token::Or, tag("||")), value(Token::Assign, tag("=")), value(Token::Greater, tag(">")), value(Token::Less, tag("<")), value(Token::Plus, tag("+")), value(Token::Minus, tag("-")), value(Token::Star, tag("*")), value(Token::Slash, tag("/")), value(Token::Not, tag("!")), ))(input) } fn parse_punctuation(input: &str) -> IResult<&str, Token> { alt(( value(Token::OpenParen, tag("(")), value(Token::CloseParen, tag(")")), value(Token::OpenBrace, tag("{")), value(Token::CloseBrace, tag("}")), value(Token::OpenBracket, tag("[")), value(Token::CloseBracket, tag("]")), value(Token::Comma, tag(",")), value(Token::Semicolon, tag(";")), value(Token::Dot, tag(".")), ))(input) } }