* Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de.
* Distributed under the terms of the MIT License.
#include "ICUCollateData.h"
#include <string.h>
#include <strings.h>
#include <wchar.h>
#include <unicode/unistr.h>
#include <AutoDeleter.h>
namespace BPrivate {
namespace Libroot {
ICUCollateData::ICUCollateData(pthread_key_t tlsKey)
delete fCollator;
ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName)
status_t result = inherited::SetTo(locale, posixLocaleName);
if (result == B_OK) {
UErrorCode icuStatus = U_ZERO_ERROR;
delete fCollator;
fCollator = Collator::createInstance(fLocale, icuStatus);
if (!U_SUCCESS(icuStatus))
return B_NO_MEMORY;
return result;
status_t result = inherited::SetToPosix();
if (result == B_OK) {
delete fCollator;
fCollator = NULL;
return result;
ICUCollateData::Strcoll(const char* a, const char* b, int& result)
if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
// handle POSIX here as the collator ICU uses for that (english) is
// incompatible in too many ways
result = strcmp(a, b);
for (const char* aIter = a; *aIter != 0; ++aIter) {
if (*aIter < 0)
return B_BAD_VALUE;
for (const char* bIter = b; *bIter != 0; ++bIter) {
if (*bIter < 0)
return B_BAD_VALUE;
return B_OK;
status_t status = B_OK;
UErrorCode icuStatus = U_ZERO_ERROR;
if (strcasecmp(fGivenCharset, "utf-8") == 0) {
UCharIterator aIter, bIter;
uiter_setUTF8(&aIter, a, -1);
uiter_setUTF8(&bIter, b, -1);
result = fCollator->compare(aIter, bIter, icuStatus);
} else {
UnicodeString unicodeA;
UnicodeString unicodeB;
if (_ToUnicodeString(a, unicodeA) != B_OK
|| _ToUnicodeString(b, unicodeB) != B_OK) {
status = B_BAD_VALUE;
result = fCollator->compare(unicodeA, unicodeB, icuStatus);
if (!U_SUCCESS(icuStatus))
status = B_BAD_VALUE;
return status;
ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize)
if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
// handle POSIX here as the collator ICU uses for that (english) is
// incompatible in too many ways
outSize = strlcpy(out, in, size);
for (const char* inIter = in; *inIter != 0; ++inIter) {
if (*inIter < 0)
return B_BAD_VALUE;
return B_OK;
if (in == NULL) {
outSize = 0;
return B_OK;
UnicodeString unicodeIn;
if (_ToUnicodeString(in, unicodeIn) != B_OK)
return B_BAD_VALUE;
outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size);
return B_OK;
ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result)
if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
// handle POSIX here as the collator ICU uses for that (english) is
// incompatible in too many ways
result = wcscmp(a, b);
for (const wchar_t* aIter = a; *aIter != 0; ++aIter) {
if (*aIter > 127)
return B_BAD_VALUE;
for (const wchar_t* bIter = b; *bIter != 0; ++bIter) {
if (*bIter > 127)
return B_BAD_VALUE;
return B_OK;
UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1);
UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1);
UErrorCode icuStatus = U_ZERO_ERROR;
result = fCollator->compare(unicodeA, unicodeB, icuStatus);
if (!U_SUCCESS(icuStatus))
return B_BAD_VALUE;
return B_OK;
ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t size,
size_t& outSize)
if (in == NULL) {
outSize = 0;
return B_OK;
if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
// handle POSIX here as the collator ICU uses for that (english) is
// incompatible in too many ways
outSize = wcslcpy(out, in, size);
for (const wchar_t* inIter = in; *inIter != 0; ++inIter) {
if (*inIter > 127)
return B_BAD_VALUE;
return B_OK;
UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1);
size_t requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0);
uint8_t* buffer = (uint8_t*)out;
outSize = fCollator->getSortKey(unicodeIn, buffer, requiredSize);
// convert 1-byte characters to 4-byte wide characters:
for (size_t i = 0; i < outSize; ++i)
out[outSize - 1 - i] = buffer[outSize - 1 - i];
return B_OK;
ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out)
if (in == NULL)
return B_OK;
size_t inLen = strlen(in);
if (inLen == 0)
return B_OK;
UConverter* converter;
status_t result = _GetConverter(converter);
if (result != B_OK)
return result;
UErrorCode icuStatus = U_ZERO_ERROR;
int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus);
return B_BAD_VALUE;
if (outLen < 0)
return B_ERROR;
if (outLen == 0)
return B_OK;
UChar* outBuf = out.getBuffer(outLen + 1);
icuStatus = U_ZERO_ERROR;
= ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus);
if (!U_SUCCESS(icuStatus)) {
return B_BAD_VALUE;
return B_OK;
} // namespace Libroot
} // namespace BPrivate
↑ V595 The 'in' pointer was utilized before it was verified against nullptr. Check lines: 119, 127.