kblib  0.2.3
General utilities library for modern C++
stringops.h
Go to the documentation of this file.
1 /* *****************************************************************************
2  * kblib is a general utility library for C++14 and C++17, intended to provide
3  * performant high-level abstractions and more expressive ways to do simple
4  * things.
5  *
6  * Copyright (c) 2021 killerbee
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  * ****************************************************************************/
21 
31 #ifndef KBLIB_STRINGOPS_H
32 #define KBLIB_STRINGOPS_H
33 
34 #include "algorithm.h"
35 #include "format.h"
36 #include "tdecl.h"
37 #include "traits.h"
38 
39 #include <algorithm>
40 #include <cctype>
41 #include <cwctype>
42 #include <initializer_list>
43 #include <numeric>
44 #include <string>
45 #include <type_traits>
46 
47 #if KBLIB_USE_CXX17
48 # include <string_view>
49 #endif
50 
51 namespace kblib {
52 
53 #if true or KBLIB_USE_CXX17
61 template <typename C>
62 struct is_character
63  : contains_type<std::tuple<char, wchar_t, char16_t, char32_t
64 # if __cpp_char8_t
65  ,
66  char8_t
67 # endif
68  >,
69  std::decay_t<C>> {
70 };
71 
75 template <typename C>
76 constexpr bool is_character_v = is_character<C>::value;
77 
78 namespace detail {
79 
86  template <typename T, bool = std::is_arithmetic<T>::value>
87  struct arithmetic_type {
88  using type = void;
89  };
96  template <typename T>
97  struct arithmetic_type<T, true> {
98  using type = T;
99  };
103  template <typename T>
104  using arithmetic_type_t = typename arithmetic_type<T>::type;
105 
113  template <typename T, typename = arithmetic_type_t<T>>
114  struct str_type {
119  using type = std::string;
125  KBLIB_NODISCARD static auto convert(T in) -> std::string {
126  return std::to_string(in);
127  }
128  };
140  template <typename T>
141  struct str_type<T, void> {
146  using type = T;
150  KBLIB_NODISCARD static auto convert(T&& in) -> type {
151  return std::forward<T>(in);
152  }
153  };
157  template <>
158  struct str_type<char, char> {
159  using type = char;
160  KBLIB_NODISCARD static auto convert(char in) -> char { return in; }
161  };
165  template <>
166  struct str_type<wchar_t, wchar_t> {
167  using type = wchar_t;
168  KBLIB_NODISCARD static auto convert(wchar_t in) -> wchar_t { return in; }
169  };
173  template <>
174  struct str_type<char16_t, char16_t> {
175  using type = char16_t;
176  KBLIB_NODISCARD static auto convert(char16_t in) -> char16_t {
177  return in;
178  }
179  };
183  template <>
184  struct str_type<char32_t, char32_t> {
185  using type = char32_t;
186  KBLIB_NODISCARD static auto convert(char32_t in) -> char32_t {
187  return in;
188  }
189  };
190 # if __cpp_char8_t
194  template <>
195  struct str_type<char8_t, char8_t> {
196  using type = char8_t;
197  KBLIB_NODISCARD static auto convert(char8_t in) -> char8_t { return in; }
198  };
199 # endif
203  template <typename T>
204  using str_type_t = typename str_type<T>::type;
205 
206 } // namespace detail
207 
208 # if KBLIB_USE_CXX17
216 template <typename Str>
217 KBLIB_NODISCARD auto strsize(Str&& str) -> std::size_t {
218  if constexpr (std::is_array_v<std::remove_reference_t<Str>>) {
219  return fakestd::size(str);
220  } else if constexpr (std::is_pointer_v<std::decay_t<Str>>) {
221  return std::char_traits<std::decay_t<decltype(*str)>>::length(str);
222  } else if constexpr (is_character_v<std::decay_t<Str>>) {
223  return 1;
224  } else if constexpr (std::is_integral_v<std::decay_t<Str>>) {
225  return to_unsigned(count_digits(str));
226  } else if constexpr (std::is_floating_point_v<std::decay_t<Str>>) {
227  return to_unsigned(count_digits(str));
228  } else {
229  return fakestd::size(str);
230  }
231 }
232 
233 template <typename CharT>
234 KBLIB_NODISCARD constexpr auto length(const CharT* str) noexcept
235  -> std::size_t {
236  return std::char_traits<CharT>::length(str);
237 }
238 
250 template <typename string, typename F, typename... S>
251 auto append(string&& out, F&& f, S&&... tail) -> void {
252  if constexpr (is_character_v<std::decay_t<F>>) {
253  out.append(1, f);
254  } else if constexpr (std::is_arithmetic_v<std::decay_t<F>>) {
255  out.append(std::to_string(f));
256  } else {
257  out.append(f);
258  }
259  if constexpr (sizeof...(S) > 0) {
260  append(out, tail...);
261  }
262  return;
263 }
264 
265 namespace detail {
266 
267  template <std::size_t I, typename T>
268  struct value {
269  T v;
270  };
271 
272  template <class Idxs, class... Ts>
273  struct values;
274 
275  template <std::size_t... Idxs, typename... Ts>
276  struct values<std::index_sequence<Idxs...>, Ts...> : value<Idxs, Ts>... {};
277 
278  template <typename string, typename... S, std::size_t... I>
279  KBLIB_NODISCARD auto concat_impl(std::index_sequence<I...>, S&&... ins)
280  -> string {
281  values<std::index_sequence<I...>, detail::str_type_t<S>...> buf{
282  {detail::str_type<S>::convert(std::forward<S>(ins))}...};
283  string ret;
284  std::size_t size
285  = (strsize(static_cast<value<I, detail::str_type_t<S>>&>(buf).v) + ...
286  + 0);
287  ret.reserve(size);
288  append(ret, static_cast<value<I, detail::str_type_t<S>>&>(buf).v...);
289  return ret;
290  }
291 
292 } // namespace detail
293 
304 template <typename string = std::string, typename F, typename... S>
305 KBLIB_NODISCARD auto concat(F&& f, S&&... ins) -> string {
306  return detail::concat_impl<string>(
307  std::make_index_sequence<1 + sizeof...(S)>{}, std::forward<F>(f),
308  std::forward<S>(ins)...);
309 }
310 
318 template <typename string = std::string, typename str>
319 KBLIB_NODISCARD auto concat(std::initializer_list<str> ins) -> string {
320  string ret;
321  ret.reserve(std::accumulate(
322  ins.begin(), ins.end(), std::size_t{0},
323  [](std::size_t z, const str& s) { return z + strsize(s); }));
324  for (auto&& s : ins) {
325  append(ret, s);
326  }
327  return ret;
328 }
329 # endif
330 
331 KBLIB_NODISCARD inline auto isspace(char c) -> bool {
332  return std::isspace(to_unsigned(c));
333 }
334 KBLIB_NODISCARD inline auto isspace(wchar_t c) -> bool {
335  return iswspace(to_unsigned(c));
336 }
337 
338 struct is_space {
339  KBLIB_NODISCARD auto operator()(char c) -> bool { return isspace(c); }
340  KBLIB_NODISCARD auto operator()(wchar_t c) -> bool { return isspace(c); }
341 };
342 
343 KBLIB_NODISCARD constexpr inline auto isAspace(char c) -> bool {
344  for (auto v : " \t\r\n\f\v") {
345  if (c == v) {
346  return true;
347  }
348  }
349  return false;
350 }
351 KBLIB_NODISCARD constexpr inline auto isAspace(wchar_t c) -> bool {
352  for (auto v : L" \t\r\n\f\v") {
353  if (c == v) {
354  return true;
355  }
356  }
357  return false;
358 }
359 
369 template <typename range, typename string = std::string>
370 KBLIB_NODISCARD auto join(const range& in, const string& joiner = "") {
371  if (fakestd::size(in) > 0) {
372 
373  auto len = kblib::accumulate(
374  begin(in), end(in), std::size_t{},
375  [](std::size_t l, const auto& x) { return l + strsize(x); });
376  auto ret = *begin(in);
377  try_reserve(ret, len);
378  kblib::copy(next(begin(in)), end(in),
379  consumer([&](const auto& x) { append(ret, joiner, x); }));
380  return ret;
381  } else {
382  return typename value_type_linear<range>::type{};
383  }
384 }
385 #endif // KBLIB_USE_CXX17
386 
395 template <typename Container = std::vector<std::string>, typename Predicate,
396  typename String>
397 KBLIB_NODISCARD auto split_tokens(const String& in, Predicate spacer)
398  -> return_assert_t<
399  is_callable<Predicate,
400  typename Container::value_type::value_type>::value,
401  Container> {
402  Container ret{};
403  bool delim_run = true;
404  const char* begpos{};
405  auto endpos = begpos;
406  for (const auto& c : in) {
407  if (delim_run) {
408  // keep begpos updated as long as in a delimiter run
409  begpos = &c;
410  }
411  if (spacer(c) and not std::exchange(delim_run, true)) {
412  // c is first of a run of delimiters
413  ret.emplace_back(begpos, &c - begpos);
414  } else if (not spacer(c)) {
415  // c is not a delimiter
416  delim_run = false;
417  }
418  endpos = &c;
419  }
420  if (not delim_run and begpos != endpos) {
421  ret.emplace_back(begpos, endpos - begpos + 1);
422  }
423  return ret;
424 }
425 
432 template <typename Container = std::vector<std::string>, typename String>
433 KBLIB_NODISCARD auto split_tokens(const String& in) -> Container {
434  return split_tokens(in, is_space{});
435 }
436 
444 template <typename Container = std::vector<std::string>, typename String>
446  const String& in, typename Container::value_type::value_type delim)
447  -> Container {
448  Container ret{};
449  bool delim_run = true;
450  using CharT = typename Container::value_type::value_type;
451  const CharT* begpos{};
452  auto endpos = begpos;
453  for (const CharT& c : in) {
454  if (delim_run) {
455  // keep begpos updated as long as in a delimiter run
456  begpos = &c;
457  }
458  if (c == delim and not std::exchange(delim_run, true)) {
459  // c is first of a run of delimiters
460  ret.emplace_back(begpos, &c - begpos);
461  } else if (c != delim) {
462  // c is not a delimiter
463  delim_run = false;
464  }
465  endpos = &c;
466  }
467  if (not delim_run and begpos != endpos) {
468  ret.emplace_back(&*begpos, endpos - begpos + 1);
469  }
470  return ret;
471 }
472 
473 template <typename Container = std::vector<std::string>, typename String>
474 KBLIB_NODISCARD auto kbsplit2(const String& in, char delim = ' ') -> Container {
475  Container ret{""};
476  bool delim_run = true;
477  for (char c : in) {
478  if (c == delim and not std::exchange(delim_run, true)) {
479  // c is first of a run of delimiters
480  ret.emplace_back();
481  } else if (c != delim) {
482  // c is not a delimiter
483  delim_run = false;
484  ret.back().push_back(c);
485  }
486  }
487  if (ret.back().empty()) {
488  ret.pop_back();
489  }
490  return ret;
491 }
492 
500 template <typename Container = std::vector<std::string>, typename String>
501 KBLIB_NODISCARD auto split_dsv(const String& str, char delim) -> Container {
502  Container ret;
503  for (std::size_t pos1{}, pos2{str.find(delim)}; pos1 != str.npos;) {
504  ret.emplace_back(str, pos1, pos2 - pos1);
505  pos1 = std::exchange(pos2, str.find(delim, pos2 + 1));
506  if (pos1 != str.npos) {
507  ++pos1;
508  }
509  }
510  return ret;
511 }
512 
520 template <typename Container = std::vector<std::string>, typename String,
521  typename Predicate>
522 KBLIB_NODISCARD auto split_dsv(const String& str, Predicate delim)
523  -> return_assert_t<
524  is_callable<Predicate,
525  typename Container::value_type::value_type>::value,
526  Container> {
527  Container ret;
528  for (std::size_t pos1{}, pos2{str.find(delim)}; pos1 != str.npos;) {
529  ret.emplace_back(str, pos1, pos2 - pos1);
530  pos1 = std::exchange(
531  pos2, kblib::find_in_if(str.begin() + pos1 + 1, str.end(), delim));
532  if (pos1 != str.npos) {
533  ++pos1;
534  }
535  }
536  return ret;
537 }
538 
539 // TODO(killerbee13): figure out if any uses of reverseStr, toLower, toUpper
540 // exist in current projects
541 
551 template <typename string>
552 KBLIB_NODISCARD auto reverse_str(string val) -> string {
553  std::reverse(val.begin(), val.end());
554  return val;
555 }
556 
557 namespace detail {
558 
559  template <typename CharT>
560  KBLIB_NODISCARD inline auto to_int_type(CharT ch) {
562  }
563  template <typename CharT, typename IntT>
564  KBLIB_NODISCARD inline auto to_char_type(IntT ch) {
566  }
567 
568  KBLIB_NODISCARD inline auto tolower(char ch) {
569  return to_char_type<char>(std::tolower(to_int_type(ch)));
570  }
571 
572  KBLIB_NODISCARD inline auto towlower(wchar_t ch) {
573  return to_char_type<wchar_t>(std::towlower(to_int_type(ch)));
574  }
575 
576  KBLIB_NODISCARD inline auto toupper(char ch) {
577  return to_char_type<char>(std::toupper(to_int_type(ch)));
578  }
579 
580  KBLIB_NODISCARD inline auto towupper(wchar_t ch) {
581  return to_char_type<wchar_t>(std::towupper(to_int_type(ch)));
582  }
583 } // namespace detail
590 template <typename string>
591 KBLIB_NODISCARD constexpr auto tolower(string str) -> string {
592  std::transform(str.begin(), str.end(), str.begin(),
593  [](auto c) { return detail::tolower(c); });
594  return str;
595 }
596 
603 template <typename string>
604 KBLIB_NODISCARD auto toupper(string str) -> string {
605  std::transform(str.begin(), str.end(), str.begin(),
606  [](auto c) { return detail::toupper(c); });
607  return str;
608 }
609 
621 template <typename string>
622 KBLIB_NODISCARD auto repeat(string val, std::size_t count) -> string {
623  string tmp;
624  try_reserve(tmp, fakestd::size(val) * count);
625  for (std::size_t i = 0; i < count; ++i) {
626  tmp += val;
627  }
628  return tmp;
629 }
639 KBLIB_NODISCARD inline auto repeat(char val, std::size_t count) -> std::string {
640  return std::string(count, val);
641 }
642 
643 #if KBLIB_USE_STRING_VIEW
644 
651 KBLIB_NODISCARD inline auto ends_with(std::string_view haystack,
652  std::string_view needle) -> bool {
653  return haystack.size() >= needle.size()
654  and haystack.compare(haystack.size() - needle.size(),
655  std::string_view::npos, needle)
656  == 0;
657 }
658 
665 KBLIB_NODISCARD inline auto ends_with(std::string_view haystack, char needle)
666  -> bool {
667  return not haystack.empty() and haystack.back() == needle;
668 }
669 
676 KBLIB_NODISCARD inline auto starts_with(std::string_view haystack,
677  std::string_view needle) -> bool {
678  return haystack.size() >= needle.size()
679  and haystack.compare(0, needle.size(), needle) == 0;
680 }
681 
688 KBLIB_NODISCARD inline auto starts_with(std::string_view haystack, char needle)
689  -> bool {
690  return not haystack.empty() and haystack.front() == needle;
691 }
692 
693 #endif
694 
695 } // namespace kblib
696 
697 #endif // KBLIB_STRINGOPS_H
Provides general-purpose algorithms, similar to the <algorithms> header.
Contains some utilities for manipulating and querying string representations.
auto to_char_type(IntT ch)
Definition: stringops.h:564
auto toupper(char ch)
Definition: stringops.h:576
auto towlower(wchar_t ch)
Definition: stringops.h:572
auto to_int_type(CharT ch)
Definition: stringops.h:560
auto tolower(char ch)
Definition: stringops.h:568
auto towupper(wchar_t ch)
Definition: stringops.h:580
constexpr auto size(const C &c) -> decltype(c.size())
Definition: fakestd.h:365
The main namespace in which all entities from kblib are defined.
Definition: algorithm.h:44
auto split_tokens(const String &in, Predicate spacer) -> return_assert_t< is_callable< Predicate, typename Container::value_type::value_type >::value, Container >
Split a string on all condensed delimiters.
Definition: stringops.h:397
constexpr auto exchange(T &obj, U &&new_value) -> T
Definition: fakestd.h:718
constexpr auto size(const C &c) -> decltype(c.size())
Definition: fakestd.h:1069
auto consumer(F f) -> consume_iterator< F >
Creates a consume_iterator of deduced type F.
Definition: iterators.h:1599
auto kbsplit2(const String &in, char delim=' ') -> Container
Definition: stringops.h:474
constexpr auto tolower(string str) -> string
Folds all characters in a string using the default execution character set to lowercase.
Definition: stringops.h:591
auto try_reserve(C &c, std::size_t s) noexcept(noexcept(c.reserve(s))) -> void
Attempt to reserve capacity in a container. No-op if unsupported.
Definition: traits.h:239
constexpr auto range(Value min, Value max, Delta step=0) -> range_t< Value, Delta >
Constructs a range from beginning, end, and step amount. The range is half-open, that is min is in th...
Definition: iterators.h:620
constexpr auto ends_with(BidirIt1 begin1, BidirIt1 end1, BidirIt2 begin2, BidirIt2 end2, BinaryPred pred={}) -> enable_if_t<(is_bidirectional_iterator_v< BidirIt1 > and is_bidirectional_iterator_v< BidirIt2 >) and not(is_random_access_iterator_v< BidirIt1 > and is_random_access_iterator_v< BidirIt2 >), bool >
Checks if a given range ends with a particular subrange.
Definition: algorithm.h:781
constexpr auto starts_with(InputIt1 begin1, EndIt1 end1, InputIt2 begin2, EndIt2 end2, BinaryPred pred) -> enable_if_t<(is_input_iterator_v< InputIt1 > and is_input_iterator_v< InputIt2 >) and not(is_random_access_iterator_v< InputIt1 > and is_random_access_iterator_v< InputIt2 >), bool >
Checks if a given range starts with a particular subrange.
Definition: algorithm.h:738
auto reverse_str(string val) -> string
Reverses all the elements of its input.
Definition: stringops.h:552
auto to_string(Int num) -> std::string
Definition: convert.h:71
typename std::decay< T >::type decay_t
Definition: fakestd.h:57
auto split_dsv(const String &str, char delim) -> Container
Split a string on all instances of delim.
Definition: stringops.h:501
constexpr auto count_digits(Number val) -> enable_if_t< std::is_floating_point< Number >::value, int >
Calculates the number of decimal digits needed to represent a number, plus one for negative numbers.
Definition: format.h:50
constexpr auto accumulate(InputIt first, InputIt last, T init) -> T
A constexpr version of std::accumulate.
Definition: algorithm.h:162
auto toupper(string str) -> string
Folds all characters in a string using the default execution character set to uppercase.
Definition: stringops.h:604
constexpr auto copy(InputIt first, EndIt last, OutputIt out) -> OutputIt
Copies all elements of [first, last) to out. It also allows for a sentinel end iterator.
Definition: algorithm.h:1322
typename return_assert< V, T >::type return_assert_t
Definition: fakestd.h:542
constexpr auto find_in_if(ForwardIt begin, EndIt end, UnaryPredicate pred) noexcept(noexcept(kblib::invoke(pred, *begin))) -> size_t
Find the offset of the first element for which p returns true. It also allows for a sentinel end iter...
Definition: algorithm.h:474
constexpr auto to_unsigned(I x) -> std::make_unsigned_t< I >
Cast integral argument to corresponding unsigned type.
Definition: fakestd.h:585
constexpr auto repeat(std::size_t N, Callable func) noexcept(noexcept(func())) -> return_assert_t< is_invocable< Callable >::value, void >
Invoke a function N times.
Definition: algorithm.h:53
constexpr auto transform(InputIt first, EndIt last, OutputIt d_first, UnaryOperation unary_op) -> OutputIt
transform applies the given function to a range and stores the result in another range,...
Definition: algorithm.h:1631
Definition: bits.h:714
Provides macros and basic templates used by the rest of kblib.
#define KBLIB_NODISCARD
This internal macro is used to provide a fallback for [[nodiscard]] in C++14.
Definition: tdecl.h:81
Contains some type traits not in the standard library that are useful in the implementation of kblib.