//===- llvm/Support/Unicode.cpp - Unicode character properties -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements functions that allow querying certain properties of // Unicode characters. // //===----------------------------------------------------------------------===// #include "llvm/Support/Unicode.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/UnicodeCharRanges.h" namespace llvm { namespace sys { namespace unicode { /// Unicode code points of the categories L, M, N, P, S and Zs are considered /// printable. /// In addition, U+00AD SOFT HYPHEN is also considered printable, as /// it's actually displayed on most terminals. \return true if the character is /// considered printable. bool isPrintable(int UCS) { … } /// Unicode code points of the Cf category are considered /// formatting characters. bool isFormatting(int UCS) { … } /// Gets the number of positions a character is likely to occupy when output /// on a terminal ("character width"). This depends on the implementation of the /// terminal, and there's no standard definition of character width. /// The implementation defines it in a way that is expected to be compatible /// with a generic Unicode-capable terminal. /// \return Character width: /// * ErrorNonPrintableCharacter (-1) for non-printable characters (as /// identified by isPrintable); /// * 0 for non-spacing and enclosing combining marks; /// * 2 for CJK characters excluding halfwidth forms; /// * 1 for all remaining characters. static inline int charWidth(int UCS) { … } static bool isprintableascii(char c) { … } int columnWidthUTF8(StringRef Text) { … } } // namespace unicode } // namespace sys } // namespace llvm