yume/token_8hpp_source.html

#pragma once


#include "atom.hpp"

#include "util.hpp"

#include <algorithm>

#include <optional>

#include <sstream>

#include <string>

#include <string_view>

#include <utility>

#include <vector>


namespace llvm {

class raw_ostream;

}


namespace yume {


/// Represents a location in source code, as a range starting at a line and column and ending at some other line and

/// column of some file.

/**

 * Lines are 1-indexed, meaning a line number of 0 as either the beginning or end represents an unknown location.

 * The range is inclusive, meaning a location representing just a single character would have its begin line and column

 * be equal to its end line and column. There is no way to store a location representing "zero characters".

 */

struct Loc {

  int begin_line;

  int begin_col;

  int end_line;

  int end_col;

  const char* file;


  constexpr auto operator<=>(const Loc& other) const noexcept = default;


  /// \brief Create a new location representing the "union" of two locations.

  ///

  /// The new location will have the beginning line and column of whichever location has a beginning earlier in the

  /// file, and an ending line and column of whichever location has an ending later in the file.

  constexpr auto operator+(const Loc& other) const noexcept -> Loc {

    YUME_ASSERT(other.file == file, "Cannot add locations in different files");

    auto [min_begin_line, min_begin_col] =

        std::min(std::pair{begin_line, begin_col}, std::pair{other.begin_line, other.begin_col});

    auto [max_end_line, max_end_col] = std::max(std::pair{end_line, end_col}, std::pair{other.end_line, other.end_col});

    return Loc{min_begin_line, min_begin_col, max_end_line, max_end_col, file};

  }


  [[nodiscard]] auto to_string() const -> string {

    stringstream ss{};

    if (file != nullptr) {

      if (auto filename = string{file}; filename.front() == '<' && filename.back() == '>') {

        // This is a special "fake path", so we don't try to normalize it.

        ss << filename;

      } else {

        ss << fs::path(file).stem().native();

      }

    }


    if (!valid()) {

      ss << ":?";

    } else {

      ss << ':' << begin_line << ':' << begin_col;

      if (end_line != begin_line)

        ss << ' ' << end_line << ':' << end_col;

      else if (end_col != begin_col)

        ss << " :" << end_col;

    }

    return ss.str();

  }


  [[nodiscard]] auto valid() const -> bool { return begin_line > 0 && end_line > 0; }


  /// Return a new Loc which refers to the first character of the current Loc.

  [[nodiscard]] auto single() const -> Loc { return {begin_line, begin_col, begin_line, begin_col, file}; }

};


/// A categorized token in source code, created by the tokenizer. These tokens are consumed by the lexer.

/**

 * Each token has a type, an associated payload (usually the text the token was created from) and a location \link Loc

 */

struct Token {

  enum struct Type {

    Word,      ///< Any form of keyword or identifier, essentially the "default" token type

    Skip,      ///< Tokens which should be ignored, i.e. insignificant whitespace

    Symbol,    ///< Special characters, such as those representing operators

    Literal,   ///< A string literal, enclosed in quotes

    Number,    ///< A number literal

    Char,      ///< A character literal, beginning with `?`

    Separator, ///< A newline or a semicolon `;`

    EndOfFile  ///< A token added at the very end of the file

  };

  static auto inline constexpr type_name(Type type) -> const char* {

    using enum Token::Type;

    switch (type) {

    case Word: return "Word";

    case Skip: return "Skip";

    case Symbol: return "Symbol";

    case Literal: return "Literal";

    case Number: return "Number";

    case Char: return "Char";

    case Separator: return "Separator";

    case EndOfFile: return "End of File";

    }

  }


  using Payload = optional<Atom>;


  Type type;

  Payload payload;

  int index = -1;

  Loc loc{};


  [[nodiscard]] auto is_a(const std::pair<Type, Atom>& type_atom) const -> bool {

    return type == type_atom.first && payload == type_atom.second;

  }


  explicit constexpr Token(Type type) : type(type) {}

  constexpr Token(Type type, Payload payload) noexcept : type(type), payload(payload) {}

  constexpr Token(Type type, Payload payload, int i, Loc loc) noexcept

      : type(type), payload(payload), index{i}, loc{loc} {}


  friend auto operator<<(llvm::raw_ostream& os, const Token& token) -> llvm::raw_ostream&;

};


/// Consume the contents of the input stream and create corresponding tokens, preserving every token, including

/// whitespace. This is usually undesired.

/// \sa tokenize

auto tokenize_preserve_skipped(std::istream& in, const string& source_file) -> vector<Token>;


/// Consume the contents of the input stream and create corresponding tokens, ignoring insignificant whitespace

auto tokenize(std::istream& in, const string& source_file) -> vector<Token>;

} // namespace yume

atom.hpp

llvm
Definition: ast.hpp:20

yume
Definition: ast.cpp:8

yume::tokenize_preserve_skipped
auto tokenize_preserve_skipped(std::istream &in, const string &source_file) -> vector< Token >
Consume the contents of the input stream and create corresponding tokens, preserving every token,...
Definition: token.cpp:276

yume::tokenize
auto tokenize(std::istream &in, const string &source_file) -> vector< Token >
Consume the contents of the input stream and create corresponding tokens, ignoring insignificant whit...
Definition: token.cpp:282

yume::Loc
Represents a location in source code, as a range starting at a line and column and ending at some oth...
Definition: token.hpp:26

yume::Loc::end_col
int end_col
Definition: token.hpp:30

yume::Loc::to_string
auto to_string() const -> string
Definition: token.hpp:47

yume::Loc::valid
auto valid() const -> bool
Definition: token.hpp:70

yume::Loc::single
auto single() const -> Loc
Return a new Loc which refers to the first character of the current Loc.
Definition: token.hpp:73

yume::Loc::operator<=>
constexpr auto operator<=>(const Loc &other) const noexcept=default

yume::Loc::operator+
constexpr auto operator+(const Loc &other) const noexcept -> Loc
Create a new location representing the "union" of two locations.
Definition: token.hpp:39

yume::Loc::begin_col
int begin_col
Definition: token.hpp:28

yume::Loc::file
const char * file
Definition: token.hpp:31

yume::Loc::begin_line
int begin_line
Definition: token.hpp:27

yume::Loc::end_line
int end_line
Definition: token.hpp:29

yume::Token
A categorized token in source code, created by the tokenizer. These tokens are consumed by the lexer.
Definition: token.hpp:80

yume::Token::Type
Type
Definition: token.hpp:81

yume::Token::Token
constexpr Token(Type type, Payload payload, int i, Loc loc) noexcept
Definition: token.hpp:118

yume::Token::payload
Payload payload
Definition: token.hpp:108

yume::Token::type
Type type
Definition: token.hpp:107

yume::Token::Payload
optional< Atom > Payload
Definition: token.hpp:105

yume::Token::index
int index
Definition: token.hpp:109

yume::Token::Token
constexpr Token(Type type, Payload payload) noexcept
Definition: token.hpp:117

yume::Token::loc
Loc loc
Definition: token.hpp:110

yume::Token::type_name
static auto constexpr type_name(Type type) -> const char *
Definition: token.hpp:91

yume::Token::operator<<
friend auto operator<<(llvm::raw_ostream &os, const Token &token) -> llvm::raw_ostream &
Definition: token.cpp:291

yume::Token::Token
constexpr Token(Type type)
Definition: token.hpp:116

yume::Token::is_a
auto is_a(const std::pair< Type, Atom > &type_atom) const -> bool
Definition: token.hpp:112

util.hpp

YUME_ASSERT
#define YUME_ASSERT(assertion, message)
Definition: util.hpp:81