/* *****************************************************************************
* kblib is a general utility library for C++14 and C++17, intended to provide
* performant high-level abstractions and more expressive ways to do simple
* things.
*
* Copyright (c) 2021 killerbee
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
* ****************************************************************************/
/**
* @file
* @brief Provides utilities for performing common operations on strings.
*
* @author killerbee
* @date 2019-2021
* @copyright GNU General Public Licence v3.0
*/
#ifndef KBLIB_STRINGOPS_H
#define KBLIB_STRINGOPS_H
#include "algorithm.h"
#include "format.h"
#include "tdecl.h"
#include "traits.h"
#include
#include
#include
#include
#include
#include
#include
#if KBLIB_USE_CXX17
# include
#endif
namespace KBLIB_NS {
#if true or KBLIB_USE_CXX17
/**
* @brief Determine if the given type, ignoring const or reference qualifiers,
* is a character type.
*
* Standard character types include char, wchar_t, char16_t, char32_t, and,
* in C++20, char8_t.
*/
template
struct is_character
: contains_type,
std::decay_t> {
};
/**
* @brief Equivalent to is_character::value.
*/
template
constexpr bool is_character_v = is_character::value;
namespace detail {
/**
* @brief Filter only arithmetic types.
*
* If T is an arithmetic type, provides the member type = T. Otherwise, type
* = void. The primary template is for non-arithmetic types.
*/
template ::value>
struct arithmetic_type {
using type = void;
};
/**
* @brief Filter only arithmetic types.
*
* Provides the member type = T. This partial specialization is for
* arithmetic types.
*/
template
struct arithmetic_type {
using type = T;
};
/**
* @brief Equivalent to typename arithmetic_type::type.
*/
template
using arithmetic_type_t = typename arithmetic_type::type;
/**
* @brief Converts arithmetic types to strings, but provides the identity
* transformation for all other types.
*
* This is primarily an implementation detail of concat, provided in the main
* namespace because it might be generally useful.
*/
template >
struct str_type {
/**
* @brief Arithmetic types can be converted into strings using the
* standard library.
*/
using type = std::string;
/**
* @brief Forwards to std::to_string.
* @param in A numeric value to convert to a string.
* @return std::string A string representation of that number.
*/
KBLIB_NODISCARD static auto convert(T in) -> std::string {
return std::to_string(in);
}
};
/**
* @brief Performs a natural conversion to a stringlike type.
*
* A natural conversion for an arithmetic type is std::to_string. For any
* other type, there is no assumed transformation, so they are passed through
* unchanged.
*
* @note This is primarily an implementation detail of concat, provided in
* the main namespace because it might be generally useful. This partial
* specialization is for non-arithmetic types.
*/
template
struct str_type {
/**
* @brief Non-arithmetic types are either already stringlike, or have no
* natural conversion to std::string.
*/
using type = T;
/**
* @brief Returns the argument unchanged.
*/
KBLIB_NODISCARD static auto convert(T&& in) -> type {
return std::forward(in);
}
};
/**
* @brief Override for char to avoid conversion to integer
*/
template <>
struct str_type {
using type = char;
KBLIB_NODISCARD static auto convert(char in) -> char { return in; }
};
/**
* @brief Override for wchar_t to avoid conversion to integer
*/
template <>
struct str_type {
using type = wchar_t;
KBLIB_NODISCARD static auto convert(wchar_t in) -> wchar_t { return in; }
};
/**
* @brief Override for char16_t to avoid conversion to integer
*/
template <>
struct str_type {
using type = char16_t;
KBLIB_NODISCARD static auto convert(char16_t in) -> char16_t {
return in;
}
};
/**
* @brief Override for char32_t to avoid conversion to integer
*/
template <>
struct str_type {
using type = char32_t;
KBLIB_NODISCARD static auto convert(char32_t in) -> char32_t {
return in;
}
};
# if __cpp_char8_t
/**
* @brief Override for char8_t to avoid conversion to integer
*/
template <>
struct str_type {
using type = char8_t;
KBLIB_NODISCARD static auto convert(char8_t in) -> char8_t { return in; }
};
# endif
/**
* @brief Provides the natural stringlike type for representing a T.
*/
template
using str_type_t = typename str_type::type;
} // namespace detail
# if KBLIB_USE_CXX17
/**
* @brief Determines the size in characters of any valid argument to concat or
* append.
* @param str A value of any stringlike or arithmetic type to count the
* characters of.
* @return std::size_t The number of characters needed to represent str.
*/
template
KBLIB_NODISCARD auto strsize(Str&& str) -> std::size_t {
if constexpr (std::is_array_v>) {
return fakestd::size(str);
} else if constexpr (std::is_pointer_v>) {
return std::char_traits>::length(str);
} else if constexpr (is_character_v>) {
return 1;
} else if constexpr (std::is_integral_v>) {
return to_unsigned(count_digits(str));
} else if constexpr (std::is_floating_point_v>) {
return to_unsigned(count_digits(str));
} else {
return fakestd::size(str);
}
}
template
KBLIB_NODISCARD constexpr auto length(const CharT* str) noexcept
-> std::size_t {
return std::char_traits::length(str);
}
/**
* @brief Given an object out of resizable stringlike type string, appends all
* other arguments to it.
*
* Stringlike types and characters are simply appended, while arithmetic types
* are first converted to strings using std::to_string.
*
* @param out The string to append to.
* @param f The first value to append to out.
* @param tail Any number of subsequent values to append to out.
*/
template
auto append(string&& out, F&& f, S&&... tail) -> void {
if constexpr (is_character_v>) {
out.append(1, f);
} else if constexpr (std::is_arithmetic_v>) {
out.append(std::to_string(f));
} else {
out.append(f);
}
if constexpr (sizeof...(S) > 0) {
append(out, tail...);
}
return;
}
namespace detail {
template
struct value {
T v;
};
template
struct values;
template
struct values, Ts...> : value... {};
template
KBLIB_NODISCARD auto concat_impl(std::index_sequence, S&&... ins)
-> string {
values, detail::str_type_t...> buf{
{detail::str_type::convert(std::forward(ins))}...};
string ret;
std::size_t size
= (strsize(static_cast>&>(buf).v) + ...
+ 0);
ret.reserve(size);
append(ret, static_cast>&>(buf).v...);
return ret;
}
} // namespace detail
/**
* @brief Returns a string consisting of the concatenation of all arguments.
*
* Arithmetic types are first converted by calling std::to_string.
*
* @param f The first argument to concatenate.
* @param ins Any number of arguments to concatenate onto f.
* @return string A string containing the concatenated values of all the
* arguments.
*/
template
KBLIB_NODISCARD auto concat(F&& f, S&&... ins) -> string {
return detail::concat_impl(
std::make_index_sequence<1 + sizeof...(S)>{}, std::forward(f),
std::forward(ins)...);
}
/**
* @brief Returns a string consisting of the concatenation of all elements of an
* initializer list.
* @param ins A series of values to concatenate together.
* @return string A string containing the concatenated values of all the
* arguments.
*/
template
KBLIB_NODISCARD auto concat(std::initializer_list ins) -> string {
string ret;
ret.reserve(std::accumulate(
ins.begin(), ins.end(), std::size_t{0},
[](std::size_t z, const str& s) { return z + strsize(s); }));
for (auto&& s : ins) {
append(ret, s);
}
return ret;
}
# endif
KBLIB_NODISCARD inline auto isspace(char c) -> bool {
return std::isspace(to_unsigned(c));
}
KBLIB_NODISCARD inline auto isspace(wchar_t c) -> bool {
return iswspace(to_unsigned(c));
}
struct is_space {
KBLIB_NODISCARD auto operator()(char c) -> bool { return isspace(c); }
KBLIB_NODISCARD auto operator()(wchar_t c) -> bool { return isspace(c); }
};
KBLIB_NODISCARD constexpr inline auto isAspace(char c) -> bool {
for (auto v : " \t\r\n\f\v") {
if (c == v) {
return true;
}
}
return false;
}
KBLIB_NODISCARD constexpr inline auto isAspace(wchar_t c) -> bool {
for (auto v : L" \t\r\n\f\v") {
if (c == v) {
return true;
}
}
return false;
}
/**
* @brief Concatenates all elements of a range together with an optional joiner.
*
* range must support iteration and be supported by fakestd::size().
*
* @param in A sequence of strings to concatenate.
* @param joiner A string which will be inserted between every element of in.
* @return string The joined string.
*/
template
KBLIB_NODISCARD auto join(const range& in, const string& joiner = "") {
if (fakestd::size(in) > 0) {
auto len = kblib::accumulate(
begin(in), end(in), std::size_t{},
[](std::size_t l, const auto& x) { return l + strsize(x); });
auto ret = *begin(in);
try_reserve(ret, len);
kblib::copy(next(begin(in)), end(in),
consumer([&](const auto& x) { append(ret, joiner, x); }));
return ret;
} else {
return typename value_type_linear::type{};
}
}
#endif // KBLIB_USE_CXX17
/**
* @brief Split a string on all condensed delimiters.
*
* @param in The string to split
* @param spacer A predicate which determines whether a character is a
* delimiter.
* @return Container A sequence container of all substrings in the split input.
*/
template , typename Predicate,
typename String>
KBLIB_NODISCARD auto split_tokens(const String& in, Predicate spacer)
-> return_assert_t<
is_callable::value,
Container> {
Container ret{};
bool delim_run = true;
const char* begpos{};
auto endpos = begpos;
for (const auto& c : in) {
if (delim_run) {
// keep begpos updated as long as in a delimiter run
begpos = &c;
}
if (spacer(c) and not std::exchange(delim_run, true)) {
// c is first of a run of delimiters
ret.emplace_back(begpos, &c - begpos);
} else if (not spacer(c)) {
// c is not a delimiter
delim_run = false;
}
endpos = &c;
}
if (not delim_run and begpos != endpos) {
ret.emplace_back(begpos, endpos - begpos + 1);
}
return ret;
}
/**
* @brief Split a string on all instances of whitespace.
*
* @param in The string to split
* @return Container A sequence container of all substrings in the split input.
*/
template , typename String>
KBLIB_NODISCARD auto split_tokens(const String& in) -> Container {
return split_tokens(in, is_space{});
}
/**
* @brief Split a string on all instances of a delimiter.
*
* @param in The string to split
* @param delim The character to split on. A run of delimiters is condensed.
* @return Container A sequence container of all substrings in the split input.
*/
template , typename String>
KBLIB_NODISCARD auto split_tokens(
const String& in, typename Container::value_type::value_type delim)
-> Container {
Container ret{};
bool delim_run = true;
using CharT = typename Container::value_type::value_type;
const CharT* begpos{};
auto endpos = begpos;
for (const CharT& c : in) {
if (delim_run) {
// keep begpos updated as long as in a delimiter run
begpos = &c;
}
if (c == delim and not std::exchange(delim_run, true)) {
// c is first of a run of delimiters
ret.emplace_back(begpos, &c - begpos);
} else if (c != delim) {
// c is not a delimiter
delim_run = false;
}
endpos = &c;
}
if (not delim_run and begpos != endpos) {
ret.emplace_back(&*begpos, endpos - begpos + 1);
}
return ret;
}
template , typename String>
KBLIB_NODISCARD auto kbsplit2(const String& in, char delim = ' ') -> Container {
Container ret{""};
bool delim_run = true;
for (char c : in) {
if (c == delim and not std::exchange(delim_run, true)) {
// c is first of a run of delimiters
ret.emplace_back();
} else if (c != delim) {
// c is not a delimiter
delim_run = false;
ret.back().push_back(c);
}
}
if (ret.back().empty()) {
ret.pop_back();
}
return ret;
}
/**
* @brief Split a string on all instances of delim.
*
* @param in The string to split
* @param delim The character to split on.
* @return Container A sequence container of all substrings in the split input.
*/
template , typename String>
KBLIB_NODISCARD auto split_dsv(const String& str, char delim) -> Container {
Container ret;
for (std::size_t pos1{}, pos2{str.find(delim)}; pos1 != str.npos;) {
ret.emplace_back(str, pos1, pos2 - pos1);
pos1 = std::exchange(pos2, str.find(delim, pos2 + 1));
if (pos1 != str.npos) {
++pos1;
}
}
return ret;
}
/**
* @brief Split a string on all instances of delim.
*
* @param in The string to split
* @param delim A predicate for delimiters.
* @return Container A sequence container of all substrings in the split input.
*/
template , typename String,
typename Predicate>
KBLIB_NODISCARD auto split_dsv(const String& str, Predicate delim)
-> return_assert_t<
is_callable::value,
Container> {
Container ret;
for (std::size_t pos1{}, pos2{str.find(delim)}; pos1 != str.npos;) {
ret.emplace_back(str, pos1, pos2 - pos1);
pos1 = std::exchange(
pos2, kblib::find_in_if(str.begin() + pos1 + 1, str.end(), delim));
if (pos1 != str.npos) {
++pos1;
}
}
return ret;
}
// TODO(killerbee13): figure out if any uses of reverseStr, toLower, toUpper
// exist in current projects
/**
* @brief Reverses all the elements of its input.
*
* @attention This function will not behave correctly with multibyte character
* encodings.
*
* @param val The string to reverse.
* @return string The reversed range.
*/
template
KBLIB_NODISCARD auto reverse_str(string val) -> string {
std::reverse(val.begin(), val.end());
return val;
}
namespace detail {
template
KBLIB_NODISCARD inline auto to_int_type(CharT ch) {
return std::char_traits::to_int_type(ch);
}
template
KBLIB_NODISCARD inline auto to_char_type(IntT ch) {
return std::char_traits::to_char_type(ch);
}
KBLIB_NODISCARD inline auto tolower(char ch) {
return to_char_type(std::tolower(to_int_type(ch)));
}
KBLIB_NODISCARD inline auto towlower(wchar_t ch) {
return to_char_type(std::towlower(to_int_type(ch)));
}
KBLIB_NODISCARD inline auto toupper(char ch) {
return to_char_type(std::toupper(to_int_type(ch)));
}
KBLIB_NODISCARD inline auto towupper(wchar_t ch) {
return to_char_type(std::towupper(to_int_type(ch)));
}
} // namespace detail
/**
* @brief Folds all characters in a string using the default execution character
* set to lowercase.
* @param str The string to case-fold.
* @return string The case-folded string.
*/
template
KBLIB_NODISCARD constexpr auto tolower(string str) -> string {
std::transform(str.begin(), str.end(), str.begin(),
[](auto c) { return detail::tolower(c); });
return str;
}
/**
* @brief Folds all characters in a string using the default execution character
* set to uppercase.
* @param str The string to case-fold.
* @return string The case-folded string.
*/
template
KBLIB_NODISCARD auto toupper(string str) -> string {
std::transform(str.begin(), str.end(), str.begin(),
[](auto c) { return detail::toupper(c); });
return str;
}
/**
* @brief Construct a string consisting of count copies of val concatenated
* together.
*
* This function currently works greedily and will be inefficient for large
* values of count.
*
* @param val
* @param count
* @todo Defer constrution of a string with a class.
*/
template
KBLIB_NODISCARD auto repeat(string val, std::size_t count) -> string {
string tmp;
try_reserve(tmp, fakestd::size(val) * count);
for (std::size_t i = 0; i < count; ++i) {
tmp += val;
}
return tmp;
}
/**
* @brief Construct a string consisting of count copies of val.
*
* This function is a trivial wrapper around a constructor of std::string
* provided for symmetry with the above overload.
*
* @param val The character to be repeated.
* @param count The number of times to repeat val.
*/
KBLIB_NODISCARD inline auto repeat(char val, std::size_t count) -> std::string {
return std::string(count, val);
}
#if KBLIB_USE_STRING_VIEW
/**
* @brief Checks if a given string ends with a particular string.
* @param haystack The string to be checked.
* @param needle The suffix to check for.
* @return bool If haystack ends with needle.
*/
KBLIB_NODISCARD inline auto ends_with(std::string_view haystack,
std::string_view needle) -> bool {
return haystack.size() >= needle.size()
and haystack.compare(haystack.size() - needle.size(),
std::string_view::npos, needle)
== 0;
}
/**
* @brief Checks if a given string ends with a particular string.
* @param haystack The string to be checked.
* @param needle The suffix to check for.
* @return bool If haystack ends with needle.
*/
KBLIB_NODISCARD inline auto ends_with(std::string_view haystack, char needle)
-> bool {
return not haystack.empty() and haystack.back() == needle;
}
/**
* @brief Checks if a given string starts with a particular string.
* @param haystack The string to be checked.
* @param needle The prefix to check for.
* @return bool If haystack starts with needle.
*/
KBLIB_NODISCARD inline auto starts_with(std::string_view haystack,
std::string_view needle) -> bool {
return haystack.size() >= needle.size()
and haystack.compare(0, needle.size(), needle) == 0;
}
/**
* @brief Checks if a given string starts with a particular string.
* @param haystack The string to be checked.
* @param needle The prefix to check for.
* @return bool If haystack starts with needle.
*/
KBLIB_NODISCARD inline auto starts_with(std::string_view haystack, char needle)
-> bool {
return not haystack.empty() and haystack.front() == needle;
}
#endif
} // namespace KBLIB_NS
#endif // KBLIB_STRINGOPS_H