// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************** * COPYRIGHT: * Copyright (c) 1996-2016, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************** * * ucnv_bld.cpp: * * Defines functions that are used in the creation/initialization/deletion * of converters and related structures. * uses uconv_io.h routines to access disk information * is used by ucnv.h to implement public API create/delete/flushCache routines * Modification History: * * Date Name Description * * 06/20/2000 helena OS/400 port changes; mostly typecast. * 06/29/2000 helena Major rewrite of the callback interface. */ #include "unicode/utypes.h" #if !UCONFIG_NO_CONVERSION #include "unicode/putil.h" #include "unicode/udata.h" #include "unicode/ucnv.h" #include "unicode/uloc.h" #include "mutex.h" #include "putilimp.h" #include "uassert.h" #include "utracimp.h" #include "ucnv_io.h" #include "ucnv_bld.h" #include "ucnvmbcs.h" #include "ucnv_ext.h" #include "ucnv_cnv.h" #include "ucnv_imp.h" #include "uhash.h" #include "umutex.h" #include "cstring.h" #include "cmemory.h" #include "ucln_cmn.h" #include "ustr_cnv.h" #if 0 #include <stdio.h> extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); #define UCNV_DEBUG_LOG … #else #define UCNV_DEBUG_LOG(x,y,z) … #endif static const UConverterSharedData * const converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]= …; /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. Also the name should be in lower case and all spaces, dashes and underscores removed */ static struct { … } const cnvNameType[] = …; /*initializes some global variables */ static UHashtable *SHARED_DATA_HASHTABLE = …; static icu::UMutex cnvCacheMutex; /* Note: the global mutex is used for */ /* reference count updates. */ static const char **gAvailableConverters = …; static uint16_t gAvailableConverterCount = …; static icu::UInitOnce gAvailableConvertersInitOnce { … }; #if !U_CHARSET_IS_UTF8 /* This contains the resolved converter name. So no further alias lookup is needed again. */ static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for nullptr */ static const char *gDefaultConverterName = nullptr; /* If the default converter is an algorithmic converter, this is the cached value. We don't cache a full UConverter and clone it because ucnv_clone doesn't have less overhead than an algorithmic open. We don't cache non-algorithmic converters because ucnv_flushCache must be able to unload the default converter and its table. */ static const UConverterSharedData *gDefaultAlgorithmicSharedData = nullptr; /* Does gDefaultConverterName have a converter option and require extra parsing? */ static UBool gDefaultConverterContainsOption; #endif /* !U_CHARSET_IS_UTF8 */ static const char DATA_TYPE[] = …; /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). * If it is ever to be called from elsewhere, synchronization * will need to be considered. */ static void ucnv_flushAvailableConverterCache() { … } /* ucnv_cleanup - delete all storage held by the converter cache, except any */ /* in use by open converters. */ /* Not thread safe. */ /* Not supported API. */ static UBool U_CALLCONV ucnv_cleanup() { … } U_CAPI void U_EXPORT2 ucnv_enableCleanup() { … } static UBool U_CALLCONV isCnvAcceptable(void * /*context*/, const char * /*type*/, const char * /*name*/, const UDataInfo *pInfo) { … } /** * Un flatten shared data from a UDATA.. */ static UConverterSharedData* ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) { … } /*Takes an alias name gets an actual converter file name *goes to disk and opens it. *allocates the memory and returns a new UConverter object */ static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) { … } /*returns a converter type from a string */ static const UConverterSharedData * getAlgorithmicTypeFromName(const char *realName) { … } /* * Based on the number of known converters, this determines how many times larger * the shared data hash table should be. When on small platforms, or just a couple * of converters are used, this number should be 2. When memory is plentiful, or * when ucnv_countAvailable is ever used with a lot of available converters, * this should be 4. * Larger numbers reduce the number of hash collisions, but use more memory. */ #define UCNV_CACHE_LOAD_FACTOR … /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ /* Will always be called with the cnvCacheMutex already being held */ /* by the calling function. */ /* Stores the shared data in the SHARED_DATA_HASHTABLE * @param data The shared data */ static void ucnv_shareConverterData(UConverterSharedData * data) { … } /* Look up a converter name in the shared data cache. */ /* cnvCacheMutex must be held by the caller to protect the hash table. */ /* gets the shared data from the SHARED_DATA_HASHTABLE (might return nullptr if it isn't there) * @param name The name of the shared data * @return the shared data from the SHARED_DATA_HASHTABLE */ static UConverterSharedData * ucnv_getSharedConverterData(const char *name) { … } /*frees the string of memory blocks associates with a sharedConverter *if and only if the referenceCounter == 0 */ /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and * returns true, * otherwise returns false * @param sharedConverterData The shared data * @return if not it frees all the memory stemming from sharedConverterData and * returns true, otherwise returns false */ static UBool ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) { … } /** * Load a non-algorithmic converter. * If pkg==nullptr, then this function must be called inside umtx_lock(&cnvCacheMutex). */ UConverterSharedData * ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { … } /** * Unload a non-algorithmic converter. * It must be sharedData->isReferenceCounted * and this function must be called inside umtx_lock(&cnvCacheMutex). */ U_CAPI void ucnv_unload(UConverterSharedData *sharedData) { … } U_CFUNC void ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) { … } U_CFUNC void ucnv_incrementRefCount(UConverterSharedData *sharedData) { … } /* * *pPieces must be initialized. * The name without options will be copied to pPieces->cnvName. * The locale and options will be copied to pPieces only if present in inName, * otherwise the existing values in pPieces remain. * *pArgs will be set to the pPieces values. */ static void parseConverterOptions(const char *inName, UConverterNamePieces *pPieces, UConverterLoadArgs *pArgs, UErrorCode *err) { … } /*Logic determines if the converter is Algorithmic AND/OR cached *depending on that: * -we either go to get data from disk and cache it (Data=true, Cached=false) * -Get it from a Hashtable (Data=X, Cached=true) * -Call dataConverter initializer (Data=true, Cached=true) * -Call AlgorithmicConverter initializer (Data=false, Cached=true) */ U_CFUNC UConverterSharedData * ucnv_loadSharedData(const char *converterName, UConverterNamePieces *pPieces, UConverterLoadArgs *pArgs, UErrorCode * err) { … } U_CAPI UConverter * ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) { … } U_CFUNC UBool ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { … } UConverter * ucnv_createAlgorithmicConverter(UConverter *myUConverter, UConverterType type, const char *locale, uint32_t options, UErrorCode *err) { … } U_CFUNC UConverter* ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) { … } U_CFUNC UConverter* ucnv_createConverterFromSharedData(UConverter *myUConverter, UConverterSharedData *mySharedConverterData, UConverterLoadArgs *pArgs, UErrorCode *err) { … } /*Frees all shared immutable objects that aren't referred to (reference count = 0) */ U_CAPI int32_t U_EXPORT2 ucnv_flushCache () { … } /* available converters list --------------------------------------------------- */ static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { … } static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { … } U_CFUNC uint16_t ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { … } U_CFUNC const char * ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { … } /* default converter name --------------------------------------------------- */ #if !U_CHARSET_IS_UTF8 /* Copy the canonical converter name. ucnv_getDefaultName must be thread safe, which can call this function. ucnv_setDefaultName calls this function and it doesn't have to be thread safe because there is no reliable/safe way to reset the converter in use in all threads. If you did reset the converter, you would not be sure that retrieving a default converter for one string would be the same type of default converter for a successive string. Since the name is a returned via ucnv_getDefaultName without copying, you shouldn't be modifying or deleting the string from a separate thread. */ static inline void internalSetName(const char *name, UErrorCode *status) { UConverterNamePieces stackPieces; UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; int32_t length=(int32_t)(uprv_strlen(name)); UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != nullptr); const UConverterSharedData *algorithmicSharedData; stackArgs.name = name; if(containsOption) { stackPieces.cnvName[0] = 0; stackPieces.locale[0] = 0; stackPieces.options = 0; parseConverterOptions(name, &stackPieces, &stackArgs, status); if(U_FAILURE(*status)) { return; } } algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); umtx_lock(&cnvCacheMutex); gDefaultAlgorithmicSharedData = algorithmicSharedData; gDefaultConverterContainsOption = containsOption; uprv_memcpy(gDefaultConverterNameBuffer, name, length); gDefaultConverterNameBuffer[length]=0; /* gDefaultConverterName MUST be the last global var set by this function. */ /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ // But there is nothing here preventing that from being reordered, either by the compiler // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. // -- Andy gDefaultConverterName = gDefaultConverterNameBuffer; ucnv_enableCleanup(); umtx_unlock(&cnvCacheMutex); } #endif /* * In order to be really thread-safe, the get function would have to take * a buffer parameter and copy the current string inside a mutex block. * This implementation only tries to be really thread-safe while * setting the name. * It assumes that setting a pointer is atomic. */ U_CAPI const char* U_EXPORT2 ucnv_getDefaultName() { … } #if U_CHARSET_IS_UTF8 U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) { … } #else /* This function is not thread safe, and it can't be thread safe. See internalSetName or the API reference for details. */ U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *converterName) { if(converterName==nullptr) { /* reset to the default codepage */ gDefaultConverterName=nullptr; } else { UErrorCode errorCode = U_ZERO_ERROR; UConverter *cnv = nullptr; const char *name = nullptr; /* if the name is there, test it out and get the canonical name with options */ cnv = ucnv_open(converterName, &errorCode); if(U_SUCCESS(errorCode) && cnv != nullptr) { name = ucnv_getName(cnv, &errorCode); } if(U_SUCCESS(errorCode) && name!=nullptr) { internalSetName(name, &errorCode); } /* else this converter is bad to use. Don't change it to a bad value. */ /* The close may make the current name go away. */ ucnv_close(cnv); /* reset the converter cache */ u_flushDefaultConverter(); } } #endif /* data swapping ------------------------------------------------------------ */ /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ #if !UCONFIG_NO_LEGACY_CONVERSION U_CAPI int32_t U_EXPORT2 ucnv_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { … } #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ #endif