/* ***************************************************************************** * kblib is a general utility library for C++14 and C++17, intended to provide * performant high-level abstractions and more expressive ways to do simple * things. * * Copyright (c) 2021 killerbee * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * ****************************************************************************/ /** * @file * @brief Provides utilities for performing common operations on strings. * * @author killerbee * @date 2019-2021 * @copyright GNU General Public Licence v3.0 */ #ifndef KBLIB_STRINGOPS_H #define KBLIB_STRINGOPS_H #include "algorithm.h" #include "format.h" #include "tdecl.h" #include "traits.h" #include #include #include #include #include #include #include #if KBLIB_USE_CXX17 # include #endif namespace KBLIB_NS { #if true or KBLIB_USE_CXX17 /** * @brief Determine if the given type, ignoring const or reference qualifiers, * is a character type. * * Standard character types include char, wchar_t, char16_t, char32_t, and, * in C++20, char8_t. */ template struct is_character : contains_type, std::decay_t> { }; /** * @brief Equivalent to is_character::value. */ template constexpr bool is_character_v = is_character::value; namespace detail { /** * @brief Filter only arithmetic types. * * If T is an arithmetic type, provides the member type = T. Otherwise, type * = void. The primary template is for non-arithmetic types. */ template ::value> struct arithmetic_type { using type = void; }; /** * @brief Filter only arithmetic types. * * Provides the member type = T. This partial specialization is for * arithmetic types. */ template struct arithmetic_type { using type = T; }; /** * @brief Equivalent to typename arithmetic_type::type. */ template using arithmetic_type_t = typename arithmetic_type::type; /** * @brief Converts arithmetic types to strings, but provides the identity * transformation for all other types. * * This is primarily an implementation detail of concat, provided in the main * namespace because it might be generally useful. */ template > struct str_type { /** * @brief Arithmetic types can be converted into strings using the * standard library. */ using type = std::string; /** * @brief Forwards to std::to_string. * @param in A numeric value to convert to a string. * @return std::string A string representation of that number. */ KBLIB_NODISCARD static auto convert(T in) -> std::string { return std::to_string(in); } }; /** * @brief Performs a natural conversion to a stringlike type. * * A natural conversion for an arithmetic type is std::to_string. For any * other type, there is no assumed transformation, so they are passed through * unchanged. * * @note This is primarily an implementation detail of concat, provided in * the main namespace because it might be generally useful. This partial * specialization is for non-arithmetic types. */ template struct str_type { /** * @brief Non-arithmetic types are either already stringlike, or have no * natural conversion to std::string. */ using type = T; /** * @brief Returns the argument unchanged. */ KBLIB_NODISCARD static auto convert(T&& in) -> type { return std::forward(in); } }; /** * @brief Override for char to avoid conversion to integer */ template <> struct str_type { using type = char; KBLIB_NODISCARD static auto convert(char in) -> char { return in; } }; /** * @brief Override for wchar_t to avoid conversion to integer */ template <> struct str_type { using type = wchar_t; KBLIB_NODISCARD static auto convert(wchar_t in) -> wchar_t { return in; } }; /** * @brief Override for char16_t to avoid conversion to integer */ template <> struct str_type { using type = char16_t; KBLIB_NODISCARD static auto convert(char16_t in) -> char16_t { return in; } }; /** * @brief Override for char32_t to avoid conversion to integer */ template <> struct str_type { using type = char32_t; KBLIB_NODISCARD static auto convert(char32_t in) -> char32_t { return in; } }; # if __cpp_char8_t /** * @brief Override for char8_t to avoid conversion to integer */ template <> struct str_type { using type = char8_t; KBLIB_NODISCARD static auto convert(char8_t in) -> char8_t { return in; } }; # endif /** * @brief Provides the natural stringlike type for representing a T. */ template using str_type_t = typename str_type::type; } // namespace detail # if KBLIB_USE_CXX17 /** * @brief Determines the size in characters of any valid argument to concat or * append. * @param str A value of any stringlike or arithmetic type to count the * characters of. * @return std::size_t The number of characters needed to represent str. */ template KBLIB_NODISCARD auto strsize(Str&& str) -> std::size_t { if constexpr (std::is_array_v>) { return fakestd::size(str); } else if constexpr (std::is_pointer_v>) { return std::char_traits>::length(str); } else if constexpr (is_character_v>) { return 1; } else if constexpr (std::is_integral_v>) { return to_unsigned(count_digits(str)); } else if constexpr (std::is_floating_point_v>) { return to_unsigned(count_digits(str)); } else { return fakestd::size(str); } } template KBLIB_NODISCARD constexpr auto length(const CharT* str) noexcept -> std::size_t { return std::char_traits::length(str); } /** * @brief Given an object out of resizable stringlike type string, appends all * other arguments to it. * * Stringlike types and characters are simply appended, while arithmetic types * are first converted to strings using std::to_string. * * @param out The string to append to. * @param f The first value to append to out. * @param tail Any number of subsequent values to append to out. */ template auto append(string&& out, F&& f, S&&... tail) -> void { if constexpr (is_character_v>) { out.append(1, f); } else if constexpr (std::is_arithmetic_v>) { out.append(std::to_string(f)); } else { out.append(f); } if constexpr (sizeof...(S) > 0) { append(out, tail...); } return; } namespace detail { template struct value { T v; }; template struct values; template struct values, Ts...> : value... {}; template KBLIB_NODISCARD auto concat_impl(std::index_sequence, S&&... ins) -> string { values, detail::str_type_t...> buf{ {detail::str_type::convert(std::forward(ins))}...}; string ret; std::size_t size = (strsize(static_cast>&>(buf).v) + ... + 0); ret.reserve(size); append(ret, static_cast>&>(buf).v...); return ret; } } // namespace detail /** * @brief Returns a string consisting of the concatenation of all arguments. * * Arithmetic types are first converted by calling std::to_string. * * @param f The first argument to concatenate. * @param ins Any number of arguments to concatenate onto f. * @return string A string containing the concatenated values of all the * arguments. */ template KBLIB_NODISCARD auto concat(F&& f, S&&... ins) -> string { return detail::concat_impl( std::make_index_sequence<1 + sizeof...(S)>{}, std::forward(f), std::forward(ins)...); } /** * @brief Returns a string consisting of the concatenation of all elements of an * initializer list. * @param ins A series of values to concatenate together. * @return string A string containing the concatenated values of all the * arguments. */ template KBLIB_NODISCARD auto concat(std::initializer_list ins) -> string { string ret; ret.reserve(std::accumulate( ins.begin(), ins.end(), std::size_t{0}, [](std::size_t z, const str& s) { return z + strsize(s); })); for (auto&& s : ins) { append(ret, s); } return ret; } # endif KBLIB_NODISCARD inline auto isspace(char c) -> bool { return std::isspace(to_unsigned(c)); } KBLIB_NODISCARD inline auto isspace(wchar_t c) -> bool { return iswspace(to_unsigned(c)); } struct is_space { KBLIB_NODISCARD auto operator()(char c) -> bool { return isspace(c); } KBLIB_NODISCARD auto operator()(wchar_t c) -> bool { return isspace(c); } }; KBLIB_NODISCARD constexpr inline auto isAspace(char c) -> bool { for (auto v : " \t\r\n\f\v") { if (c == v) { return true; } } return false; } KBLIB_NODISCARD constexpr inline auto isAspace(wchar_t c) -> bool { for (auto v : L" \t\r\n\f\v") { if (c == v) { return true; } } return false; } /** * @brief Concatenates all elements of a range together with an optional joiner. * * range must support iteration and be supported by fakestd::size(). * * @param in A sequence of strings to concatenate. * @param joiner A string which will be inserted between every element of in. * @return string The joined string. */ template KBLIB_NODISCARD auto join(const range& in, const string& joiner = "") { if (fakestd::size(in) > 0) { auto len = kblib::accumulate( begin(in), end(in), std::size_t{}, [](std::size_t l, const auto& x) { return l + strsize(x); }); auto ret = *begin(in); try_reserve(ret, len); kblib::copy(next(begin(in)), end(in), consumer([&](const auto& x) { append(ret, joiner, x); })); return ret; } else { return typename value_type_linear::type{}; } } #endif // KBLIB_USE_CXX17 /** * @brief Split a string on all condensed delimiters. * * @param in The string to split * @param spacer A predicate which determines whether a character is a * delimiter. * @return Container A sequence container of all substrings in the split input. */ template , typename Predicate, typename String> KBLIB_NODISCARD auto split_tokens(const String& in, Predicate spacer) -> return_assert_t< is_callable::value, Container> { Container ret{}; bool delim_run = true; const char* begpos{}; auto endpos = begpos; for (const auto& c : in) { if (delim_run) { // keep begpos updated as long as in a delimiter run begpos = &c; } if (spacer(c) and not std::exchange(delim_run, true)) { // c is first of a run of delimiters ret.emplace_back(begpos, &c - begpos); } else if (not spacer(c)) { // c is not a delimiter delim_run = false; } endpos = &c; } if (not delim_run and begpos != endpos) { ret.emplace_back(begpos, endpos - begpos + 1); } return ret; } /** * @brief Split a string on all instances of whitespace. * * @param in The string to split * @return Container A sequence container of all substrings in the split input. */ template , typename String> KBLIB_NODISCARD auto split_tokens(const String& in) -> Container { return split_tokens(in, is_space{}); } /** * @brief Split a string on all instances of a delimiter. * * @param in The string to split * @param delim The character to split on. A run of delimiters is condensed. * @return Container A sequence container of all substrings in the split input. */ template , typename String> KBLIB_NODISCARD auto split_tokens( const String& in, typename Container::value_type::value_type delim) -> Container { Container ret{}; bool delim_run = true; using CharT = typename Container::value_type::value_type; const CharT* begpos{}; auto endpos = begpos; for (const CharT& c : in) { if (delim_run) { // keep begpos updated as long as in a delimiter run begpos = &c; } if (c == delim and not std::exchange(delim_run, true)) { // c is first of a run of delimiters ret.emplace_back(begpos, &c - begpos); } else if (c != delim) { // c is not a delimiter delim_run = false; } endpos = &c; } if (not delim_run and begpos != endpos) { ret.emplace_back(&*begpos, endpos - begpos + 1); } return ret; } template , typename String> KBLIB_NODISCARD auto kbsplit2(const String& in, char delim = ' ') -> Container { Container ret{""}; bool delim_run = true; for (char c : in) { if (c == delim and not std::exchange(delim_run, true)) { // c is first of a run of delimiters ret.emplace_back(); } else if (c != delim) { // c is not a delimiter delim_run = false; ret.back().push_back(c); } } if (ret.back().empty()) { ret.pop_back(); } return ret; } /** * @brief Split a string on all instances of delim. * * @param in The string to split * @param delim The character to split on. * @return Container A sequence container of all substrings in the split input. */ template , typename String> KBLIB_NODISCARD auto split_dsv(const String& str, char delim) -> Container { Container ret; for (std::size_t pos1{}, pos2{str.find(delim)}; pos1 != str.npos;) { ret.emplace_back(str, pos1, pos2 - pos1); pos1 = std::exchange(pos2, str.find(delim, pos2 + 1)); if (pos1 != str.npos) { ++pos1; } } return ret; } /** * @brief Split a string on all instances of delim. * * @param in The string to split * @param delim A predicate for delimiters. * @return Container A sequence container of all substrings in the split input. */ template , typename String, typename Predicate> KBLIB_NODISCARD auto split_dsv(const String& str, Predicate delim) -> return_assert_t< is_callable::value, Container> { Container ret; for (std::size_t pos1{}, pos2{str.find(delim)}; pos1 != str.npos;) { ret.emplace_back(str, pos1, pos2 - pos1); pos1 = std::exchange( pos2, kblib::find_in_if(str.begin() + pos1 + 1, str.end(), delim)); if (pos1 != str.npos) { ++pos1; } } return ret; } // TODO(killerbee13): figure out if any uses of reverseStr, toLower, toUpper // exist in current projects /** * @brief Reverses all the elements of its input. * * @attention This function will not behave correctly with multibyte character * encodings. * * @param val The string to reverse. * @return string The reversed range. */ template KBLIB_NODISCARD auto reverse_str(string val) -> string { std::reverse(val.begin(), val.end()); return val; } namespace detail { template KBLIB_NODISCARD inline auto to_int_type(CharT ch) { return std::char_traits::to_int_type(ch); } template KBLIB_NODISCARD inline auto to_char_type(IntT ch) { return std::char_traits::to_char_type(ch); } KBLIB_NODISCARD inline auto tolower(char ch) { return to_char_type(std::tolower(to_int_type(ch))); } KBLIB_NODISCARD inline auto towlower(wchar_t ch) { return to_char_type(std::towlower(to_int_type(ch))); } KBLIB_NODISCARD inline auto toupper(char ch) { return to_char_type(std::toupper(to_int_type(ch))); } KBLIB_NODISCARD inline auto towupper(wchar_t ch) { return to_char_type(std::towupper(to_int_type(ch))); } } // namespace detail /** * @brief Folds all characters in a string using the default execution character * set to lowercase. * @param str The string to case-fold. * @return string The case-folded string. */ template KBLIB_NODISCARD constexpr auto tolower(string str) -> string { std::transform(str.begin(), str.end(), str.begin(), [](auto c) { return detail::tolower(c); }); return str; } /** * @brief Folds all characters in a string using the default execution character * set to uppercase. * @param str The string to case-fold. * @return string The case-folded string. */ template KBLIB_NODISCARD auto toupper(string str) -> string { std::transform(str.begin(), str.end(), str.begin(), [](auto c) { return detail::toupper(c); }); return str; } /** * @brief Construct a string consisting of count copies of val concatenated * together. * * This function currently works greedily and will be inefficient for large * values of count. * * @param val * @param count * @todo Defer constrution of a string with a class. */ template KBLIB_NODISCARD auto repeat(string val, std::size_t count) -> string { string tmp; try_reserve(tmp, fakestd::size(val) * count); for (std::size_t i = 0; i < count; ++i) { tmp += val; } return tmp; } /** * @brief Construct a string consisting of count copies of val. * * This function is a trivial wrapper around a constructor of std::string * provided for symmetry with the above overload. * * @param val The character to be repeated. * @param count The number of times to repeat val. */ KBLIB_NODISCARD inline auto repeat(char val, std::size_t count) -> std::string { return std::string(count, val); } #if KBLIB_USE_STRING_VIEW /** * @brief Checks if a given string ends with a particular string. * @param haystack The string to be checked. * @param needle The suffix to check for. * @return bool If haystack ends with needle. */ KBLIB_NODISCARD inline auto ends_with(std::string_view haystack, std::string_view needle) -> bool { return haystack.size() >= needle.size() and haystack.compare(haystack.size() - needle.size(), std::string_view::npos, needle) == 0; } /** * @brief Checks if a given string ends with a particular string. * @param haystack The string to be checked. * @param needle The suffix to check for. * @return bool If haystack ends with needle. */ KBLIB_NODISCARD inline auto ends_with(std::string_view haystack, char needle) -> bool { return not haystack.empty() and haystack.back() == needle; } /** * @brief Checks if a given string starts with a particular string. * @param haystack The string to be checked. * @param needle The prefix to check for. * @return bool If haystack starts with needle. */ KBLIB_NODISCARD inline auto starts_with(std::string_view haystack, std::string_view needle) -> bool { return haystack.size() >= needle.size() and haystack.compare(0, needle.size(), needle) == 0; } /** * @brief Checks if a given string starts with a particular string. * @param haystack The string to be checked. * @param needle The prefix to check for. * @return bool If haystack starts with needle. */ KBLIB_NODISCARD inline auto starts_with(std::string_view haystack, char needle) -> bool { return not haystack.empty() and haystack.front() == needle; } #endif } // namespace KBLIB_NS #endif // KBLIB_STRINGOPS_H