chromium/third_party/hunspell/src/parsers/latexparser.cxx

/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Hunspell, based on MySpell.
 *
 * The Initial Developers of the Original Code are
 * Kevin Hendricks (MySpell) and Németh László (Hunspell).
 * Portions created by the Initial Developers are Copyright (C) 2002-2005
 * the Initial Developers. All Rights Reserved.
 *
 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <ctype.h>

#include "../hunspell/csutil.hxx"
#include "latexparser.hxx"

#ifndef W32
using namespace std;
#endif

static struct {
  const char* pat[2];
  int arg;
} PATTERN[] = {{{"\\(", "\\)"}, 0},
               {{"$$", "$$"}, 0},
               {{"$", "$"}, 0},
               {{"\\begin{math}", "\\end{math}"}, 0},
               {{"\\[", "\\]"}, 0},
               {{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
               {{"\\begin{equation}", "\\end{equation}"}, 0},
               {{"\\begin{equation*}", "\\end{equation*}"}, 0},
               {{"\\cite", NULL}, 1},
               {{"\\nocite", NULL}, 1},
               {{"\\index", NULL}, 1},
               {{"\\label", NULL}, 1},
               {{"\\ref", NULL}, 1},
               {{"\\pageref", NULL}, 1},
               {{"\\autoref", NULL}, 1},
               {{"\\parbox", NULL}, 1},
               {{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
               {{"\\verb+", "+"}, 0},
               {{"\\verb|", "|"}, 0},
               {{"\\verb#", "#"}, 0},
               {{"\\verb*", "*"}, 0},
               {{"\\documentstyle", "\\begin{document}"}, 0},
               {{"\\documentclass", "\\begin{document}"}, 0},
               //	{ { "\\documentclass", NULL } , 1 },
               {{"\\usepackage", NULL}, 1},
               {{"\\includeonly", NULL}, 1},
               {{"\\include", NULL}, 1},
               {{"\\input", NULL}, 1},
               {{"\\vspace", NULL}, 1},
               {{"\\setlength", NULL}, 2},
               {{"\\addtolength", NULL}, 2},
               {{"\\settowidth", NULL}, 2},
               {{"\\rule", NULL}, 2},
               {{"\\hspace", NULL}, 1},
               {{"\\vspace", NULL}, 1},
               {{"\\\\[", "]"}, 0},
               {{"\\pagebreak[", "]"}, 0},
               {{"\\nopagebreak[", "]"}, 0},
               {{"\\enlargethispage", NULL}, 1},
               {{"\\begin{tabular}", NULL}, 1},
               {{"\\addcontentsline", NULL}, 2},
               {{"\\begin{thebibliography}", NULL}, 1},
               {{"\\bibliography", NULL}, 1},
               {{"\\bibliographystyle", NULL}, 1},
               {{"\\bibitem", NULL}, 1},
               {{"\\begin", NULL}, 1},
               {{"\\end", NULL}, 1},
               {{"\\pagestyle", NULL}, 1},
               {{"\\pagenumbering", NULL}, 1},
               {{"\\thispagestyle", NULL}, 1},
               {{"\\newtheorem", NULL}, 2},
               {{"\\newcommand", NULL}, 2},
               {{"\\renewcommand", NULL}, 2},
               {{"\\setcounter", NULL}, 2},
               {{"\\addtocounter", NULL}, 1},
               {{"\\stepcounter", NULL}, 1},
               {{"\\selectlanguage", NULL}, 1},
               {{"\\inputencoding", NULL}, 1},
               {{"\\hyphenation", NULL}, 1},
               {{"\\definecolor", NULL}, 3},
               {{"\\color", NULL}, 1},
               {{"\\textcolor", NULL}, 1},
               {{"\\pagecolor", NULL}, 1},
               {{"\\colorbox", NULL}, 2},
               {{"\\fcolorbox", NULL}, 2},
               {{"\\declaregraphicsextensions", NULL}, 1},
               {{"\\psfig", NULL}, 1},
               {{"\\url", NULL}, 1},
               {{"\\eqref", NULL}, 1},
               {{"\\vskip", NULL}, 1},
               {{"\\vglue", NULL}, 1},
               {{"\'\'", NULL}, 1}};

#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))

LaTeXParser::LaTeXParser(const char* wordchars)
    : TextParser(wordchars)
    , pattern_num(0), depth(0), arg(0), opt(0) {
}

LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
    : TextParser(wordchars, len)
    , pattern_num(0), depth(0), arg(0), opt(0) {
}

LaTeXParser::~LaTeXParser() {}

int LaTeXParser::look_pattern(int col) {
  for (unsigned int i = 0; i < PATTERN_LEN; i++) {
    const char* j = line[actual].c_str() + head;
    const char* k = PATTERN[i].pat[col];
    if (!k)
      continue;
    while ((*k != '\0') && (tolower(*j) == *k)) {
      j++;
      k++;
    }
    if (*k == '\0')
      return i;
  }
  return -1;
}

/*
 * LaTeXParser
 *
 * state 0: not wordchar
 * state 1: wordchar
 * state 2: comments
 * state 3: commands
 * state 4: commands with arguments
 * state 5: % comment
 *
 */

bool LaTeXParser::next_token(std::string& t) {
  t.clear();
  int i;
  int slash = 0;
  int apostrophe;
  for (;;) {
    // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
    // %s\n",depth,state,arg,line[actual]+head);

    switch (state) {
      case 0:  // non word chars
        if ((pattern_num = look_pattern(0)) != -1) {
          if (PATTERN[pattern_num].pat[1]) {
            state = 2;
          } else {
            state = 4;
            depth = 0;
            arg = 0;
            opt = 1;
          }
          head += strlen(PATTERN[pattern_num].pat[0]) - 1;
        } else if (line[actual][head] == '%') {
          state = 5;
        } else if (is_wordchar(line[actual].c_str() + head)) {
          state = 1;
          token = head;
        } else if (line[actual][head] == '\\') {
          if (line[actual][head + 1] == '\\' ||   // \\ (linebreak)
              (line[actual][head + 1] == '$') ||  // \$ (dollar sign)
              (line[actual][head + 1] == '%')) {  // \% (percent)
            head++;
            break;
          }
          state = 3;
        }
        break;
      case 1:  // wordchar
        apostrophe = 0;
        if (!is_wordchar(line[actual].c_str() + head) ||
            (line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
             ++apostrophe)) {
          state = 0;
          bool ok = alloc_token(token, &head, t);
          if (apostrophe)
            head += 2;
          if (ok)
            return true;
        }
        break;
      case 2:  // comment, labels, etc
        if (((i = look_pattern(1)) != -1) &&
            (strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
          state = 0;
          head += strlen(PATTERN[pattern_num].pat[1]) - 1;
        }
        break;
      case 3:  // command
        if ((tolower(line[actual][head]) < 'a') ||
            (tolower(line[actual][head]) > 'z')) {
          state = 0;
          head--;
        }
        break;
      case 4:  // command with arguments
        if (slash && (line[actual][head] != '\0')) {
          slash = 0;
          head++;
          break;
        } else if (line[actual][head] == '\\') {
          slash = 1;
        } else if ((line[actual][head] == '{') ||
                   ((opt) && (line[actual][head] == '['))) {
          depth++;
          opt = 0;
        } else if (line[actual][head] == '}') {
          depth--;
          if (depth == 0) {
            opt = 1;
            arg++;
          }
          if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
              (depth < 0)) {
            state = 0;  // XXX not handles the last optional arg.
          }
        } else if (line[actual][head] == ']')
          depth--;
    }  // case
    if (next_char(line[actual].c_str(), &head)) {
      if (state == 5)
        state = 0;
      return false;
    }
  }
}