#include <algorithm>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <sstream>
#include "csutil.hxx"
#include "atypes.hxx"
#include "langnum.hxx"
struct unicode_info { … };
#ifdef _WIN32
#include <windows.h>
#include <wchar.h>
#endif
#ifdef OPENOFFICEORG
#include <unicode/uchar.h>
#else
#ifndef MOZILLA_CLIENT
#include "utf_info.cxx"
#define UTF_LST_LEN …
#endif
#endif
#ifdef MOZILLA_CLIENT
#include "nsCOMPtr.h"
#include "nsIUnicodeEncoder.h"
#include "nsIUnicodeDecoder.h"
#include "nsUnicharUtils.h"
#include "mozilla/dom/EncodingUtils.h"
using mozilla::dom::EncodingUtils;
#endif
struct unicode_info2 { … };
static struct unicode_info2* utf_tbl = …;
static int utf_tbl_count = …;
void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mode)
{ … }
std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) { … }
int u8_u16(std::vector<w_char>& dest, const std::string& src) { … }
namespace {
class is_any_of { … };
}
std::string::const_iterator mystrsep(const std::string &str,
std::string::const_iterator& start) { … }
char* mystrdup(const char* s) { … }
void mychomp(std::string& s) { … }
std::vector<std::string> line_tok(const std::string& text, char breakchar) { … }
void line_uniq(std::string& text, char breakchar)
{ … }
void line_uniq_app(std::string& text, char breakchar) { … }
std::string& strlinecat(std::string& str, const std::string& apd) { … }
int fieldlen(const char* r) { … }
bool copy_field(std::string& dest,
const std::string& morph,
const std::string& var) { … }
std::string& mystrrep(std::string& str,
const std::string& search,
const std::string& replace) { … }
size_t reverseword(std::string& word) { … }
size_t reverseword_utf(std::string& word) { … }
void uniqlist(std::vector<std::string>& list) { … }
namespace {
unsigned char cupper(const struct cs_info* csconv, int nIndex) { … }
unsigned char clower(const struct cs_info* csconv, int nIndex) { … }
unsigned char ccase(const struct cs_info* csconv, int nIndex) { … }
}
w_char upper_utf(w_char u, int langnum) { … }
w_char lower_utf(w_char u, int langnum) { … }
std::string& mkallcap(std::string& s, const struct cs_info* csconv) { … }
std::string& mkallsmall(std::string& s, const struct cs_info* csconv) { … }
std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
int langnum) { … }
std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) { … }
std::string& mkinitcap(std::string& s, const struct cs_info* csconv) { … }
std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) { … }
std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) { … }
std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) { … }
void store_pointer(char* dest, char* source) { … }
char* get_stored_pointer(const char* s) { … }
#ifndef MOZILLA_CLIENT
static struct cs_info iso1_tbl[] = …;
static struct cs_info iso2_tbl[] = …;
static struct cs_info iso3_tbl[] = …;
static struct cs_info iso4_tbl[] = …;
static struct cs_info iso5_tbl[] = …;
static struct cs_info iso6_tbl[] = …;
static struct cs_info iso7_tbl[] = …;
static struct cs_info iso8_tbl[] = …;
static struct cs_info iso9_tbl[] = …;
static struct cs_info iso10_tbl[] = …;
static struct cs_info koi8r_tbl[] = …;
static struct cs_info koi8u_tbl[] = …;
static struct cs_info cp1251_tbl[] = …;
static struct cs_info iso13_tbl[] = …;
static struct cs_info iso14_tbl[] = …;
static struct cs_info iso15_tbl[] = …;
static struct cs_info iscii_devanagari_tbl[] = …;
static struct cs_info tis620_tbl[] = …;
struct enc_entry { … };
static struct enc_entry encds[] = …;
static void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName,
char* pBuf) { … }
struct cs_info* get_current_cs(const std::string& es) { … }
#else
struct cs_info* get_current_cs(const std::string& es) {
struct cs_info* ccs = new cs_info[256];
for (int i = 0; i <= 0xff; ++i) {
ccs[i].ccase = false;
ccs[i].clower = i;
ccs[i].cupper = i;
}
nsCOMPtr<nsIUnicodeEncoder> encoder;
nsCOMPtr<nsIUnicodeDecoder> decoder;
nsresult rv;
nsAutoCString label(es.c_str());
nsAutoCString encoding;
if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
return ccs;
}
encoder = EncodingUtils::EncoderForEncoding(encoding);
decoder = EncodingUtils::DecoderForEncoding(encoding);
encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?');
decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
for (unsigned int i = 0; i <= 0xff; ++i) {
bool success = false;
char lower, upper;
do {
if (i == 0)
break;
const char source = char(i);
char16_t uni, uniCased;
int32_t charLength = 1, uniLength = 1;
rv = decoder->Convert(&source, &charLength, &uni, &uniLength);
if (rv != NS_OK || charLength != 1 || uniLength != 1)
break;
uniCased = ToLowerCase(uni);
rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength);
if (rv != NS_OK || charLength != 1 || uniLength != 1)
break;
uniCased = ToUpperCase(uni);
rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength);
if (rv != NS_OK || charLength != 1 || uniLength != 1)
break;
success = true;
} while (0);
if (success) {
ccs[i].cupper = upper;
ccs[i].clower = lower;
} else {
ccs[i].cupper = i;
ccs[i].clower = i;
}
if (ccs[i].clower != (unsigned char)i)
ccs[i].ccase = true;
else
ccs[i].ccase = false;
}
return ccs;
}
#endif
std::string get_casechars(const char* enc) { … }
struct lang_map { … };
static struct lang_map lang2enc[] = …;
int get_lang_num(const std::string& lang) { … }
#ifndef OPENOFFICEORG
#ifndef MOZILLA_CLIENT
void initialize_utf_tbl() {
utf_tbl_count++;
if (utf_tbl)
return;
utf_tbl = new unicode_info2[CONTSIZE];
for (size_t j = 0; j < CONTSIZE; ++j) {
utf_tbl[j].cletter = 0;
utf_tbl[j].clower = (unsigned short)j;
utf_tbl[j].cupper = (unsigned short)j;
}
for (size_t j = 0; j < UTF_LST_LEN; ++j) {
utf_tbl[utf_lst[j].c].cletter = 1;
utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower;
utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper;
}
}
#endif
#endif
void free_utf_tbl() { … }
unsigned short unicodetoupper(unsigned short c, int langnum) { … }
unsigned short unicodetolower(unsigned short c, int langnum) { … }
int unicodeisalpha(unsigned short c) { … }
int get_captype(const std::string& word, cs_info* csconv) { … }
int get_captype_utf8(const std::vector<w_char>& word, int langnum) { … }
size_t remove_ignored_chars_utf(std::string& word,
const std::vector<w_char>& ignored_chars) { … }
size_t remove_ignored_chars(std::string& word,
const std::string& ignored_chars) { … }
bool parse_string(const std::string& line, std::string& out, int ln) { … }
bool parse_array(const std::string& line,
std::string& out,
std::vector<w_char>& out_utf16,
int utf8,
int ln) { … }