File size: 4,675 Bytes
59da845
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
use nom::{
    branch::alt,
    bytes::complete::{tag, take_while1, take_until, take_while},
    character::complete::{alpha1, alphanumeric1, char, digit1, multispace0, multispace1, anychar},
    combinator::{map, map_res, recognize, value, eof, opt},
    multi::many0,
    sequence::{delimited, pair, preceded, tuple},
    IResult,
};
use crate::compiler::lexer::Token;
use std::str::FromStr;

pub struct Lexer;

impl Lexer {
    pub fn tokenize(input: &str) -> IResult<&str, Vec<Token>> {
        many0(preceded(
            // Skip whitespace and comments
            alt((
                multispace1,
                // Line comment //
                recognize(pair(tag("//"), take_while(|c| c != '\n' && c != '\r'))),
                // Block comment /* */
                recognize(delimited(tag("/*"), take_until("*/"), tag("*/"))),
            )),
            // Parse tokens
            alt((
                // Keywords & Types
                Self::parse_keyword,
                
                // Literals
                Self::parse_double_literal,
                Self::parse_int_literal,
                Self::parse_string_literal,
                Self::parse_bool_literal,
                
                // Identifiers
                Self::parse_identifier,
                
                // Operators & Punctuation (Order matters for prefix matching, e.g. == before =)
                Self::parse_operator,
                Self::parse_punctuation,
            ))
        ))(input)
    }

    fn parse_keyword(input: &str) -> IResult<&str, Token> {
        alt((
            value(Token::Int, tag("int")),
            value(Token::Double, tag("double")),
            value(Token::String, tag("string")),
            value(Token::Bool, tag("bool")),
            value(Token::Void, tag("void")),
            value(Token::Input, tag("input")),
            value(Token::If, tag("if")),
            value(Token::Else, tag("else")),
            value(Token::For, tag("for")),
            value(Token::While, tag("while")),
            value(Token::Return, tag("return")),
            value(Token::MqlTick, tag("MqlTick")),
            value(Token::MqlRates, tag("MqlRates")),
        ))(input)
    }
    
    fn parse_bool_literal(input: &str) -> IResult<&str, Token> {
        alt((
            value(Token::BoolLiteral(true), tag("true")),
            value(Token::BoolLiteral(false), tag("false")),
        ))(input)
    }

    fn parse_identifier(input: &str) -> IResult<&str, Token> {
        map(
            recognize(pair(
                alt((alpha1, tag("_"))),
                take_while(|c: char| c.is_alphanumeric() || c == '_')
            )),
            |s: &str| Token::Identifier(s.to_string())
        )(input)
    }

    fn parse_int_literal(input: &str) -> IResult<&str, Token> {
        map_res(digit1, |s: &str| s.parse::<i64>().map(Token::IntLiteral))(input)
    }

    fn parse_double_literal(input: &str) -> IResult<&str, Token> {
        map_res(
            recognize(tuple((
                digit1,
                char('.'),
                digit1
            ))),
            |s: &str| s.parse::<f64>().map(Token::DoubleLiteral)
        )(input)
    }

    fn parse_string_literal(input: &str) -> IResult<&str, Token> {
        map(
            delimited(char('"'), take_while(|c| c != '"'), char('"')),
            |s: &str| Token::StringLiteral(s.to_string())
        )(input)
    }

    fn parse_operator(input: &str) -> IResult<&str, Token> {
        alt((
            value(Token::Equals, tag("==")),
            value(Token::NotEquals, tag("!=")),
            value(Token::GreaterEq, tag(">=")),
            value(Token::LessEq, tag("<=")),
            value(Token::And, tag("&&")),
            value(Token::Or, tag("||")),
            value(Token::Assign, tag("=")),
            value(Token::Greater, tag(">")),
            value(Token::Less, tag("<")),
            value(Token::Plus, tag("+")),
            value(Token::Minus, tag("-")),
            value(Token::Star, tag("*")),
            value(Token::Slash, tag("/")),
            value(Token::Not, tag("!")),
        ))(input)
    }
    
    fn parse_punctuation(input: &str) -> IResult<&str, Token> {
        alt((
            value(Token::OpenParen, tag("(")),
            value(Token::CloseParen, tag(")")),
            value(Token::OpenBrace, tag("{")),
            value(Token::CloseBrace, tag("}")),
            value(Token::OpenBracket, tag("[")),
            value(Token::CloseBracket, tag("]")),
            value(Token::Comma, tag(",")),
            value(Token::Semicolon, tag(";")),
            value(Token::Dot, tag(".")),
        ))(input)
    }
}