
#ifndef UTIL_TEX_ATOM_H
#define UTIL_TEX_ATOM_H

#include <vector>
#include <string>
#include <iostream>

// ---------------------------------------------------------------------
// To parse a line of TeX source.
// ---------------------------------------------------------------------

// The definitions here parse a line of TeX source to the limit of the
// programmer's empirical understanding of the TeX grammar.
//
// The programmer first wrote these definitions to parse LaTeX *.toc
// files.  Foreseeing that the definitions might someday find use in
// another setting, the programmer has tried code cleanly for
// extensibility, but naturally has left many or most of the likely
// extensions themselves uncoded.  In other words, the extensions aren't
// there, but there's a proper place to put them if they come.  The
// code's principal limitation as it stands that it handles only a
// single line of TeX source at a time.  This suffices to parse
// LaTeX *.toc files.  To bridge source lines is left until the actual
// need should arise; the task if undertaken should not prove too hard.
// (Were the task undertaken, the programmer suggests, or at least
// speculates, that the neatest way to bridge might be to preprocess the
// TeX source to combine lines before the present code ever saw the
// source.  The reason the programmer has not actually coded the bridge,
// besides to save time---admittedly the biggest reason---is that he has
// no use case in his way to test it on.  With no use case and assuming
// that the programmer does not go to the effort to search out or to
// contrive a suitably realistic one, the code remains unused.  Unused
// code tends to linger years in an undetectedly buggy state.  Better
// not to have written such code in the first place.)
//
//

namespace Util {
  enum Translate_nobreakspace {
    DO_NOT_TRANSLATE_NOBREAKSPACE = 0,
    TRANSLATE_NOBREAKSPACE
  };
  void tokenize_TeX(
    const std::string &line,
    std::vector<std::string> *tokens_ptr,
    Translate_nobreakspace translate_nobreakspace =
      DO_NOT_TRANSLATE_NOBREAKSPACE
  );
  class TeX_atom;
  class TeX_atom_nonterminal;
  class TeX_atom_terminal;
  std::ostream &operator<<( std::ostream &os, const TeX_atom &atom );
}

class Util::TeX_atom {
  public:
    struct Exc_unbalanced {};
    virtual bool is_terminal() const = 0;
    virtual std::string term() const = 0;
    virtual ~TeX_atom() {}
    // The interface provides a replicate() method to let a user copy
    // a TeX_atom in ignorance of its exact type.  (There may well exist
    // a neater way to let the user do this, but the way somehow does
    // not come to mind at the moment the code is written.)
    virtual TeX_atom *replicate() const = 0;
};

class Util::TeX_atom_nonterminal
  : public TeX_atom, public std::vector<TeX_atom*>
{
  private:
    void init(
      std::vector<std::string>::const_iterator begin,
      std::vector<std::string>::const_iterator end
    );
  public:
    bool is_terminal() const { return false; }
    std::string term() const;
    ~TeX_atom_nonterminal();
    TeX_atom *replicate() const {
      return new TeX_atom_nonterminal(*this);
    }
    explicit TeX_atom_nonterminal( const std::string &line );
    explicit TeX_atom_nonterminal(
      const std::vector<std::string> &tokens
    );
    explicit TeX_atom_nonterminal(
      std::vector<std::string>::const_iterator begin,
      std::vector<std::string>::const_iterator end
    );
    explicit TeX_atom_nonterminal(
      const std::vector<TeX_atom*> &atoms
    );
    explicit TeX_atom_nonterminal(
      std::vector<TeX_atom*>::const_iterator begin,
      std::vector<TeX_atom*>::const_iterator end
    );
    TeX_atom_nonterminal( const TeX_atom_nonterminal &atom );
    TeX_atom_nonterminal &operator=( const TeX_atom_nonterminal &atom );
};

class Util::TeX_atom_terminal : public TeX_atom {
  private:
    std::string term1;
  public:
    bool is_terminal() const { return true; }
    std::string term() const { return term1; }
    TeX_atom *replicate() const { return new TeX_atom_terminal(*this); }
    explicit TeX_atom_terminal( const std::string &token )
      : term1(token) {}
    TeX_atom_terminal( const TeX_atom_terminal &atom )
      : TeX_atom(), term1(atom.term()) {}
    TeX_atom_terminal &operator=( const TeX_atom_terminal &atom );
};

#endif

