#include "git-compat-util.h" #include "strbuf.h" #include "utf8.h" /* This code is originally from https://www.cl.cam.ac.uk/~mgk25/ucs/ */ static const char utf16_be_bom[] = …; static const char utf16_le_bom[] = …; static const char utf32_be_bom[] = …; static const char utf32_le_bom[] = …; struct interval { … }; size_t display_mode_esc_sequence_len(const char *s) { … } /* auxiliary function for binary search in interval table */ static int bisearch(ucs_char_t ucs, const struct interval *table, int max) { … } /* The following two functions define the column width of an ISO 10646 * character as follows: * * - The null character (U+0000) has a column width of 0. * * - Other C0/C1 control characters and DEL will lead to a return * value of -1. * * - Non-spacing and enclosing combining characters (general * category code Mn or Me in the Unicode database) have a * column width of 0. * * - SOFT HYPHEN (U+00AD) has a column width of 1. * * - Other format characters (general category code Cf in the Unicode * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. * * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) * have a column width of 0. * * - Spacing characters in the East Asian Wide (W) or East Asian * Full-width (F) category as defined in Unicode Technical * Report #11 have a column width of 2. * * - All remaining characters (including all printable * ISO 8859-1 and WGL4 characters, Unicode control characters, * etc.) have a column width of 1. * * This implementation assumes that ucs_char_t characters are encoded * in ISO 10646. */ static int git_wcwidth(ucs_char_t ch) { … } /* * Pick one ucs character starting from the location *start points at, * and return it, while updating the *start pointer to point at the * end of that character. When remainder_p is not NULL, the location * holds the number of bytes remaining in the string that we are allowed * to pick from. Otherwise we are allowed to pick up to the NUL that * would eventually appear in the string. *remainder_p is also reduced * by the number of bytes we have consumed. * * If the string was not a valid UTF-8, *start pointer is set to NULL * and the return value is undefined. */ static ucs_char_t pick_one_utf8_char(const char **start, size_t *remainder_p) { … } /* * This function returns the number of columns occupied by the character * pointed to by the variable start. The pointer is updated to point at * the next character. When remainder_p is not NULL, it points at the * location that stores the number of remaining bytes we can use to pick * a character (see pick_one_utf8_char() above). */ int utf8_width(const char **start, size_t *remainder_p) { … } /* * Returns the total number of columns required by a null-terminated * string, assuming that the string is utf8. Returns strlen() instead * if the string does not look like a valid utf8 string. */ int utf8_strnwidth(const char *string, size_t len, int skip_ansi) { … } int utf8_strwidth(const char *string) { … } int is_utf8(const char *text) { … } static void strbuf_add_indented_text(struct strbuf *buf, const char *text, int indent, int indent2) { … } /* * Wrap the text, if necessary. The variable indent is the indent for the * first line, indent2 is the indent for all other lines. * If indent is negative, assume that already -indent columns have been * consumed (and no extra indent is necessary for the first line). */ void strbuf_add_wrapped_text(struct strbuf *buf, const char *text, int indent1, int indent2, int width) { … } void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, int indent, int indent2, int width) { … } void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width, const char *subst) { … } /* * Returns true (1) if the src encoding name matches the dst encoding * name directly or one of its alternative names. E.g. UTF-16BE is the * same as UTF16BE. */ static int same_utf_encoding(const char *src, const char *dst) { … } int is_encoding_utf8(const char *name) { … } int same_encoding(const char *src, const char *dst) { … } /* * Wrapper for fprintf and returns the total number of columns required * for the printed string, assuming that the string is utf8. */ int utf8_fprintf(FILE *stream, const char *format, ...) { … } /* * Given a buffer and its encoding, return it re-encoded * with iconv. If the conversion fails, returns NULL. */ #ifndef NO_ICONV #if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6)) typedef const char * iconv_ibp; #else iconv_ibp; #endif char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, size_t bom_len, size_t *outsz_p) { … } static const char *fallback_encoding(const char *name) { … } char *reencode_string_len(const char *in, size_t insz, const char *out_encoding, const char *in_encoding, size_t *outsz) { … } #endif static int has_bom_prefix(const char *data, size_t len, const char *bom, size_t bom_len) { … } int has_prohibited_utf_bom(const char *enc, const char *data, size_t len) { … } int is_missing_required_utf_bom(const char *enc, const char *data, size_t len) { … } /* * Returns first character length in bytes for multi-byte `text` according to * `encoding`. * * - The `text` pointer is updated to point at the next character. * - When `remainder_p` is not NULL, on entry `*remainder_p` is how much bytes * we can consume from text, and on exit `*remainder_p` is reduced by returned * character length. Otherwise `text` is treated as limited by NUL. */ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding) { … } /* * Pick the next char from the stream, ignoring codepoints an HFS+ would. * Note that this is _not_ complete by any means. It's just enough * to make is_hfs_dotgit() work, and should not be used otherwise. */ static ucs_char_t next_hfs_char(const char **in) { … } static int is_hfs_dot_generic(const char *path, const char *needle, size_t needle_len) { … } /* * Inline wrapper to make sure the compiler resolves strlen() on literals at * compile time. */ static inline int is_hfs_dot_str(const char *path, const char *needle) { … } int is_hfs_dotgit(const char *path) { … } int is_hfs_dotgitmodules(const char *path) { … } int is_hfs_dotgitignore(const char *path) { … } int is_hfs_dotgitattributes(const char *path) { … } int is_hfs_dotmailmap(const char *path) { … } const char utf8_bom[] = …; int skip_utf8_bom(char **text, size_t len) { … } void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width, const char *s) { … }