#include #include #include #include #include #include #include #include #include #include #include #include #include #include std::vector setThenSort(std::string in) { std::istringstream fin(in); std::vector words; std::string word; while (fin >> word) { std::transform(word.begin(), word.end(), word.begin(), [](char c) { return std::tolower(c); }); if (find(words.begin(), words.end(), word) == words.end()) words.push_back(word); } std::sort(words.begin(), words.end()); // for (int i = 0; i < words.size(); i++) { // std::cout << words[i] << ",\t"; // } // std::cout << "\n\n"; return words; } std::vector sortThenSet(std::string in) { std::istringstream fin(in); std::vector words; std::string word; while (fin >> word) { std::transform(word.begin(), word.end(), word.begin(), [](char c) { return std::tolower(c); }); words.push_back(word); } std::sort(words.begin(), words.end()); std::vector rwords; for (int i = 0; i < words.size(); i++) { if (find(words.begin(), words.end(), words[i]) == words.end()) { // std::cout << words[i] << ",\t"; rwords.push_back(words[i]); } } // std::cout << "\n\n"; return rwords; } std::vector sortThenSet_fixed(std::string in) { std::istringstream fin(in); std::vector words; std::string word; while (fin >> word) { std::transform(word.begin(), word.end(), word.begin(), [](char c) { return std::tolower(c); }); words.push_back(word); } std::sort(words.begin(), words.end()); words.erase(std::unique(words.begin(), words.end()), words.end()); // for (auto w : words) { // std::cout << w << ",\t"; // } // std::cout << "\n\n"; return words; } std::set allSet(std::string in) { std::istringstream fin(in); std::set words; std::string word; while (fin >> word) { std::transform(word.begin(), word.end(), word.begin(), [](char c) { return std::tolower(c); }); words.insert(word); } // for (auto w : words) { // std::cout << w << ",\t"; // } // std::cout << "\n\n"; return words; } auto allSetU(std::string in) { std::istringstream fin(in); std::unordered_set words; std::string word; while (fin >> word) { std::transform(word.begin(), word.end(), word.begin(), [](char c) { return std::tolower(c); }); words.insert(word); } std::vector rwords(words.begin(), words.end()); std::sort(rwords.begin(), rwords.end()); // for (auto w : rwords) { // std::cout << w << ",\t"; // } // std::cout << "\n\n"; return rwords; } auto flatSet(std::string in) { std::istringstream fin(in); boost::container::flat_set words; std::string word; while (fin >> word) { std::transform(word.begin(), word.end(), word.begin(), [](char c) { return std::tolower(c); }); words.insert(word); } // for (auto w : words) { // std::cout << w << ",\t"; // } // std::cout << "\n\n"; return words; } int uniq_main(int argc, char** argv) { std::string buffer; if (argc == 1) { std::ostringstream ss; ss << std::cin.rdbuf(); buffer = ss.str(); } else if (argc >= 2) { auto r = kblib::get_file_contents(argv[1]); if (!r) { std::cerr << "Error reading file " << std::quoted(argv[1]) << '\n'; return 1; } buffer = *r; } auto test = [&](auto f, auto id) { std::cout << id << ":\n"; auto s = std::chrono::steady_clock::now(); auto results = f(buffer); auto e = std::chrono::steady_clock::now(); std::cout << '\t' << results.size() << " words, " << std::chrono::duration_cast(e - s) .count() << "us\n"; }; { test( [](std::string in) { std::istringstream fin(in); std::vector words; std::string word; while (fin >> word) { std::transform(word.begin(), word.end(), word.begin(), [](char c) { return std::tolower(c); }); words.push_back(word); } return words; }, "no-op"); test(setThenSort, "!find() then sort()"); // test(sortThenSet, "sort then !find()"); test(sortThenSet_fixed, "sort then unique()"); test(allSet, "set"); test(allSetU, "unordered_set"); test(flatSet, "boost::flat_set"); } return 0; auto results = std::tuple{setThenSort(buffer), sortThenSet(buffer), sortThenSet_fixed(buffer), allSet(buffer), allSetU(buffer)}; std::cout << "!find() then sort(): " << std::get<0>(results).size() << " words\n" << "sort() then !find(): " << std::get<1>(results).size() << " words\n" << "sort() then unique(): " << std::get<2>(results).size() << " words\n" << "set: " << std::get<3>(results).size() << " words\n" << "unordered_set: " << std::get<4>(results).size() << " words\n"; return 0; }