From 841bb4e5bd319e30187245e4d677fe88e6d78c50 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 22 Jul 2018 15:33:07 -0400 Subject: [PATCH 1/3] string_util: Remove unnecessary std::string instance in TabsToSpaces() We can just use the variant of std::string's replace() function that can replace an occurrence with N copies of the same character, eliminating the need to allocate a std::string containing a buffer of spaces. --- src/common/string_util.cpp | 13 ++++++------- src/common/string_util.h | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 80f9985491..f7ce3d93c6 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -236,15 +236,14 @@ void SplitString(const std::string& str, const char delim, std::vector& output); From f34dde32d10413d0540c8cd91bf192267a619697 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 22 Jul 2018 15:36:30 -0400 Subject: [PATCH 2/3] string_util: Use emplace_back() in SplitString() instead of push_back() This is equivalent to doing: push_back(std::string("")); which is likely not to cause issues, assuming a decent std::string implementation with small-string optimizations implemented in its design, however it's still a little unnecessary to copy that buffer regardless. Instead, we can use emplace_back() to directly construct the empty string within the std::vector instance, eliminating any possible overhead from the copy. --- src/common/string_util.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index f7ce3d93c6..670c857ddc 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -230,8 +230,9 @@ void SplitString(const std::string& str, const char delim, std::vector Date: Sun, 22 Jul 2018 15:47:37 -0400 Subject: [PATCH 3/3] string_util: Get rid of separate resize() in CPToUTF16(), UTF16ToUTF8(), CodeToUTF8() and UTF8ToUTF16() There's no need to perform the resize separately here, since the constructor allows presizing the buffer. Also move the empty string check before the construction of the string to make the early out more straightforward. --- src/common/string_util.cpp | 42 ++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 670c857ddc..27f37b86d2 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -288,31 +288,37 @@ std::u16string UTF8ToUTF16(const std::string& input) { } static std::wstring CPToUTF16(u32 code_page, const std::string& input) { - auto const size = + const auto size = MultiByteToWideChar(code_page, 0, input.data(), static_cast(input.size()), nullptr, 0); - std::wstring output; - output.resize(size); + if (size == 0) { + return L""; + } - if (size == 0 || - size != MultiByteToWideChar(code_page, 0, input.data(), static_cast(input.size()), - &output[0], static_cast(output.size()))) + std::wstring output(size, L'\0'); + + if (size != MultiByteToWideChar(code_page, 0, input.data(), static_cast(input.size()), + &output[0], static_cast(output.size()))) { output.clear(); + } return output; } std::string UTF16ToUTF8(const std::wstring& input) { - auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), static_cast(input.size()), + const auto size = WideCharToMultiByte(CP_UTF8, 0, input.data(), static_cast(input.size()), nullptr, 0, nullptr, nullptr); + if (size == 0) { + return ""; + } - std::string output; - output.resize(size); + std::string output(size, '\0'); - if (size == 0 || - size != WideCharToMultiByte(CP_UTF8, 0, input.data(), static_cast(input.size()), - &output[0], static_cast(output.size()), nullptr, nullptr)) + if (size != WideCharToMultiByte(CP_UTF8, 0, input.data(), static_cast(input.size()), + &output[0], static_cast(output.size()), nullptr, + nullptr)) { output.clear(); + } return output; } @@ -333,8 +339,6 @@ std::string CP1252ToUTF8(const std::string& input) { template static std::string CodeToUTF8(const char* fromcode, const std::basic_string& input) { - std::string result; - iconv_t const conv_desc = iconv_open("UTF-8", fromcode); if ((iconv_t)(-1) == conv_desc) { LOG_ERROR(Common, "Iconv initialization failure [{}]: {}", fromcode, strerror(errno)); @@ -346,8 +350,7 @@ static std::string CodeToUTF8(const char* fromcode, const std::basic_string& // Multiply by 4, which is the max number of bytes to encode a codepoint const size_t out_buffer_size = 4 * in_bytes; - std::string out_buffer; - out_buffer.resize(out_buffer_size); + std::string out_buffer(out_buffer_size, '\0'); auto src_buffer = &input[0]; size_t src_bytes = in_bytes; @@ -372,6 +375,7 @@ static std::string CodeToUTF8(const char* fromcode, const std::basic_string& } } + std::string result; out_buffer.resize(out_buffer_size - dst_bytes); out_buffer.swap(result); @@ -381,8 +385,6 @@ static std::string CodeToUTF8(const char* fromcode, const std::basic_string& } std::u16string UTF8ToUTF16(const std::string& input) { - std::u16string result; - iconv_t const conv_desc = iconv_open("UTF-16LE", "UTF-8"); if ((iconv_t)(-1) == conv_desc) { LOG_ERROR(Common, "Iconv initialization failure [UTF-8]: {}", strerror(errno)); @@ -394,8 +396,7 @@ std::u16string UTF8ToUTF16(const std::string& input) { // Multiply by 4, which is the max number of bytes to encode a codepoint const size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes; - std::u16string out_buffer; - out_buffer.resize(out_buffer_size); + std::u16string out_buffer(out_buffer_size, char16_t{}); char* src_buffer = const_cast(&input[0]); size_t src_bytes = in_bytes; @@ -420,6 +421,7 @@ std::u16string UTF8ToUTF16(const std::string& input) { } } + std::u16string result; out_buffer.resize(out_buffer_size - dst_bytes); out_buffer.swap(result);