/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Hunspell, based on MySpell.
*
* The Initial Developers of the Original Code are
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
* Portions created by the Initial Developers are Copyright (C) 2002-2005
* the Initial Developers. All Rights Reserved.
*
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <ctype.h>
#include "../hunspell/csutil.hxx"
#include "latexparser.hxx"
#ifndef W32
using namespace std;
#endif
static struct {
const char* pat[2];
int arg;
} PATTERN[] = {{{"\\(", "\\)"}, 0},
{{"$$", "$$"}, 0},
{{"$", "$"}, 0},
{{"\\begin{math}", "\\end{math}"}, 0},
{{"\\[", "\\]"}, 0},
{{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
{{"\\begin{equation}", "\\end{equation}"}, 0},
{{"\\begin{equation*}", "\\end{equation*}"}, 0},
{{"\\cite", NULL}, 1},
{{"\\nocite", NULL}, 1},
{{"\\index", NULL}, 1},
{{"\\label", NULL}, 1},
{{"\\ref", NULL}, 1},
{{"\\pageref", NULL}, 1},
{{"\\autoref", NULL}, 1},
{{"\\parbox", NULL}, 1},
{{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
{{"\\verb+", "+"}, 0},
{{"\\verb|", "|"}, 0},
{{"\\verb#", "#"}, 0},
{{"\\verb*", "*"}, 0},
{{"\\documentstyle", "\\begin{document}"}, 0},
{{"\\documentclass", "\\begin{document}"}, 0},
// { { "\\documentclass", NULL } , 1 },
{{"\\usepackage", NULL}, 1},
{{"\\includeonly", NULL}, 1},
{{"\\include", NULL}, 1},
{{"\\input", NULL}, 1},
{{"\\vspace", NULL}, 1},
{{"\\setlength", NULL}, 2},
{{"\\addtolength", NULL}, 2},
{{"\\settowidth", NULL}, 2},
{{"\\rule", NULL}, 2},
{{"\\hspace", NULL}, 1},
{{"\\vspace", NULL}, 1},
{{"\\\\[", "]"}, 0},
{{"\\pagebreak[", "]"}, 0},
{{"\\nopagebreak[", "]"}, 0},
{{"\\enlargethispage", NULL}, 1},
{{"\\begin{tabular}", NULL}, 1},
{{"\\addcontentsline", NULL}, 2},
{{"\\begin{thebibliography}", NULL}, 1},
{{"\\bibliography", NULL}, 1},
{{"\\bibliographystyle", NULL}, 1},
{{"\\bibitem", NULL}, 1},
{{"\\begin", NULL}, 1},
{{"\\end", NULL}, 1},
{{"\\pagestyle", NULL}, 1},
{{"\\pagenumbering", NULL}, 1},
{{"\\thispagestyle", NULL}, 1},
{{"\\newtheorem", NULL}, 2},
{{"\\newcommand", NULL}, 2},
{{"\\renewcommand", NULL}, 2},
{{"\\setcounter", NULL}, 2},
{{"\\addtocounter", NULL}, 1},
{{"\\stepcounter", NULL}, 1},
{{"\\selectlanguage", NULL}, 1},
{{"\\inputencoding", NULL}, 1},
{{"\\hyphenation", NULL}, 1},
{{"\\definecolor", NULL}, 3},
{{"\\color", NULL}, 1},
{{"\\textcolor", NULL}, 1},
{{"\\pagecolor", NULL}, 1},
{{"\\colorbox", NULL}, 2},
{{"\\fcolorbox", NULL}, 2},
{{"\\declaregraphicsextensions", NULL}, 1},
{{"\\psfig", NULL}, 1},
{{"\\url", NULL}, 1},
{{"\\eqref", NULL}, 1},
{{"\\vskip", NULL}, 1},
{{"\\vglue", NULL}, 1},
{{"\'\'", NULL}, 1}};
#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
LaTeXParser::LaTeXParser(const char* wordchars)
: TextParser(wordchars)
, pattern_num(0), depth(0), arg(0), opt(0) {
}
LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
: TextParser(wordchars, len)
, pattern_num(0), depth(0), arg(0), opt(0) {
}
LaTeXParser::~LaTeXParser() {}
int LaTeXParser::look_pattern(int col) {
for (unsigned int i = 0; i < PATTERN_LEN; i++) {
const char* j = line[actual].c_str() + head;
const char* k = PATTERN[i].pat[col];
if (!k)
continue;
while ((*k != '\0') && (tolower(*j) == *k)) {
j++;
k++;
}
if (*k == '\0')
return i;
}
return -1;
}
/*
* LaTeXParser
*
* state 0: not wordchar
* state 1: wordchar
* state 2: comments
* state 3: commands
* state 4: commands with arguments
* state 5: % comment
*
*/
bool LaTeXParser::next_token(std::string& t) {
t.clear();
int i;
int slash = 0;
int apostrophe;
for (;;) {
// fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
// %s\n",depth,state,arg,line[actual]+head);
switch (state) {
case 0: // non word chars
if ((pattern_num = look_pattern(0)) != -1) {
if (PATTERN[pattern_num].pat[1]) {
state = 2;
} else {
state = 4;
depth = 0;
arg = 0;
opt = 1;
}
head += strlen(PATTERN[pattern_num].pat[0]) - 1;
} else if (line[actual][head] == '%') {
state = 5;
} else if (is_wordchar(line[actual].c_str() + head)) {
state = 1;
token = head;
} else if (line[actual][head] == '\\') {
if (line[actual][head + 1] == '\\' || // \\ (linebreak)
(line[actual][head + 1] == '$') || // \$ (dollar sign)
(line[actual][head + 1] == '%')) { // \% (percent)
head++;
break;
}
state = 3;
}
break;
case 1: // wordchar
apostrophe = 0;
if (!is_wordchar(line[actual].c_str() + head) ||
(line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
++apostrophe)) {
state = 0;
bool ok = alloc_token(token, &head, t);
if (apostrophe)
head += 2;
if (ok)
return true;
}
break;
case 2: // comment, labels, etc
if (((i = look_pattern(1)) != -1) &&
(strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
state = 0;
head += strlen(PATTERN[pattern_num].pat[1]) - 1;
}
break;
case 3: // command
if ((tolower(line[actual][head]) < 'a') ||
(tolower(line[actual][head]) > 'z')) {
state = 0;
head--;
}
break;
case 4: // command with arguments
if (slash && (line[actual][head] != '\0')) {
slash = 0;
head++;
break;
} else if (line[actual][head] == '\\') {
slash = 1;
} else if ((line[actual][head] == '{') ||
((opt) && (line[actual][head] == '['))) {
depth++;
opt = 0;
} else if (line[actual][head] == '}') {
depth--;
if (depth == 0) {
opt = 1;
arg++;
}
if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
(depth < 0)) {
state = 0; // XXX not handles the last optional arg.
}
} else if (line[actual][head] == ']')
depth--;
} // case
if (next_char(line[actual].c_str(), &head)) {
if (state == 5)
state = 0;
return false;
}
}
}