/* *****************************************************************************
* kblib is a general utility library for C++14 and C++17, intended to provide
* performant high-level abstractions and more expressive ways to do simple
* things.
*
* Copyright (c) 2021 killerbee
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
* ****************************************************************************/
/**
* @file
* @brief Provides some basic interfaces to make using ICU smoother.
*
* @author killerbee
* @date 2019-2021
* @copyright GNU General Public Licence v3.0
*/
#ifndef KBLIB_ICU_H
#define KBLIB_ICU_H
#include "tdecl.h"
#include
#include
#include
#include
namespace KBLIB_NS {
/**
* @brief Convert a UnicodeString to a UTF-8 string.
*
* This functionality is present in ICU, however the interface is inelegant.
*
* @param s The string to convert.
* @return string The re-encoded result.
*/
template
auto toUTF8(const icu::UnicodeString& s) -> string {
string res;
return s.toUTF8String(res);
}
/**
* @brief Convert a UTF-8 string into a UnicodeString.
*
* This functionality is present in ICU, and is only provided here for
* consistency with the above.
*
* @param s The string to convert.
* @return icu::UnicodeString The re-encoded result.
*/
template
auto fromUTF8(string s) -> icu::UnicodeString {
return icu::UnicodeString::fromUTF8(s);
}
/**
* @brief Converts a UnicodeString to UTF-32.
*
* @param s The string to convert.
* @return string The re-encoded result.
*/
template
auto toUTF32(const icu::UnicodeString& s) -> string {
string res(s.countChar32(), '\0');
UErrorCode ec{U_ZERO_ERROR};
s.toUTF32(&res[0], res.size(), ec);
if (U_FAILURE(ec)) {
// silence warnings about ec not being a temporary
throw UErrorCode{ec};
}
return res;
}
/**
* @brief Converts a UTF-32 string into a UnicodeString.
*
* This functionality is present in ICU, and is only provided here for
* consistency with the above.
*
* @param s The string to convert.
* @return icu::UnicodeString The re-encoded result.
*/
template
auto fromUTF32(string s) -> icu::UnicodeString {
return icu::UnicodeString::fromUTF32(s.data(), s.length());
}
/**
* @warning Defining operators for external types is error-prone and may
* unexpectedly break in the future.
*/
namespace icu_str_ops {
/**
* @brief Provides a transcoding stream insertion operator for
* UnicodeStrings.
*
* @param os The stream to insert to.
* @param str The string to output.
* @return std::ostream& A reference to os.
*/
inline auto operator<<(std::ostream& os, const icu::UnicodeString& str)
-> std::ostream& {
return os << toUTF8(str);
}
/**
* @brief Give the strange ICU interface for concatenating UTF-8 and
* UnicodeStrings a more idiomatic name in the form of operator+.
*/
inline auto operator+(std::string lhs, const icu::UnicodeString& str)
-> std::string {
return str.toUTF8String(lhs);
}
/**
* @brief
*/
inline auto operator+(icu::UnicodeString lhs, const std::string& rhs)
-> icu::UnicodeString {
return lhs += fromUTF8(rhs);
}
} // namespace icu_str_ops
/**
* @brief Reencodes val to UTF-8 and then converts it to T using the primary
* overload.
*
* @param val A string holding data to convert.
* @param type A type name to be used in error messages.
* @return T The converted value.
*/
template
auto fromStr(const icu::UnicodeString& val, const char* type = typeid(T).name())
-> T {
return fromStr(toUTF8(val), type);
}
} // namespace KBLIB_NS
#endif // KBLIB_ICU_H