use super::error::Position; #[derive(Debug, PartialEq, Eq)] pub enum Token { OpenParenthesis, Lambda, Dot, CloseParenthesis, Identifier(String), } pub fn peek<'a>(input: &'a str, beginning: usize) -> (Option, Position, &'a str, usize) { let mut characters = input.chars(); let (mut char_offset, mut byte_offset) = (0, 0); let next = loop { match characters.next() { Some(character) if character.is_whitespace() => { char_offset += 1; byte_offset += character.len_utf8(); } next => break next, } }; let (char_offset, byte_offset) = (char_offset, byte_offset); let (token, (char_end, byte_end)) = match next { Some(c @ '(') => ( Some(Token::OpenParenthesis), (char_offset + 1, byte_offset + c.len_utf8()), ), Some(c @ '\\') => ( Some(Token::Lambda), (char_offset + 1, byte_offset + c.len_utf8()), ), Some(c @ '.') => ( Some(Token::Dot), (char_offset + 1, byte_offset + c.len_utf8()), ), Some(c @ ')') => ( Some(Token::CloseParenthesis), (char_offset + 1, byte_offset + c.len_utf8()), ), Some(first) => { fn is_terminal(c: char) -> bool { c.is_whitespace() || match c { '\\' | '(' | ')' | '.' => true, _ => false, } } let mut matched = String::new(); let mut char_length = 0; matched.push(first); char_length += 1; loop { match characters.next() { Some(character) if !is_terminal(character) => { matched.push(character); char_length += 1; } _ => break, } } let char_length = char_length; let byte_length = matched.len(); ( Some(Token::Identifier(matched)), (char_offset + char_length, byte_offset + byte_length), ) } None => (None, (char_offset, byte_offset)), }; let position = match token { Some(_) => Position::At(beginning + char_offset), None => Position::End, }; return (token, position, &input[byte_end..], beginning + char_end); } #[cfg(test)] mod tests { use super::*; #[test] fn peek_open_parentheses() { assert_eq!( peek("(abc)", 0), (Some(Token::OpenParenthesis), Position::At(0), "abc)", 1) ); assert_eq!( peek(" ( hello", 0), (Some(Token::OpenParenthesis), Position::At(2), " hello", 3) ); assert_eq!( peek(" ((.", 5), (Some(Token::OpenParenthesis), Position::At(6), "(.", 7) ); } #[test] fn peek_lambdas() { assert_eq!( peek("\\x.x", 0), (Some(Token::Lambda), Position::At(0), "x.x", 1) ); assert_eq!( peek(" \\x y.x", 20), (Some(Token::Lambda), Position::At(24), "x y.x", 25) ); assert_eq!( peek(" \\\\", 2), (Some(Token::Lambda), Position::At(3), "\\", 4) ); } #[test] fn peek_identifiers() { assert_eq!( peek(" test a", 0), ( Some(Token::Identifier("test".to_owned())), Position::At(2), " a", 6 ) ); assert_eq!( peek(" a bb c", 5), ( Some(Token::Identifier("a".to_owned())), Position::At(6), " bb c", 7 ) ); assert_eq!( peek(" 東京 (a b)", 2), ( Some(Token::Identifier("東京".to_owned())), Position::At(3), " (a b)", 5 ) ) } #[test] fn peek_dots() { assert_eq!( peek(".a b", 0), (Some(Token::Dot), Position::At(0), "a b", 1) ); assert_eq!( peek(" . aaa", 40), (Some(Token::Dot), Position::At(42), " aaa", 43) ) } #[test] fn peek_close_parentheses() { assert_eq!( peek(" )))(", 1), (Some(Token::CloseParenthesis), Position::At(2), "))(", 3) ); } #[test] fn peek_none() { assert_eq!(peek(" ", 6), (None, Position::End, "", 9)); } }