Action Engine
Loading...
Searching...
No Matches
unindent.h
1#ifndef ACTIONENGINE_REDIS_CHUNK_STORE_OPS_UNINDENT_H_
2#define ACTIONENGINE_REDIS_CHUNK_STORE_OPS_UNINDENT_H_
3
4// Taken from: https://stackoverflow.com/a/75105367
5
6#include <algorithm>
7#include <cstddef>
8#include <iterator>
9#include <ranges>
10#include <string_view>
11#include <utility>
12#include <vector>
13
14namespace multiline_raw_string {
15template <class char_type>
16using string_view = std::basic_string_view<char_type>;
17
18// characters that are considered space
19// we need this because std::isspace is not constexpr
20template <class char_type>
21constexpr string_view<char_type> space_chars =
22 std::declval<string_view<char_type>>();
23template <>
24constexpr inline string_view<char> space_chars<char> = " \f\n\r\t\v";
25template <>
26constexpr inline string_view<wchar_t> space_chars<wchar_t> = L" \f\n\r\t\v";
27template <>
28constexpr inline string_view<char8_t> space_chars<char8_t> = u8" \f\n\r\t\v";
29template <>
30constexpr inline string_view<char16_t> space_chars<char16_t> = u" \f\n\r\t\v";
31template <>
32constexpr inline string_view<char32_t> space_chars<char32_t> = U" \f\n\r\t\v";
33
34// list of all potential line endings that could be encountered
35template <class char_type>
36constexpr string_view<char_type> potential_line_endings[] =
37 std::declval<string_view<char_type>[]>();
38
39template <>
40constexpr inline string_view<char> potential_line_endings<char>[] = {
41 "\r\n", "\r", "\n"};
42template <>
43constexpr inline string_view<wchar_t> potential_line_endings<wchar_t>[] = {
44 L"\r\n", L"\r", L"\n"};
45template <>
46constexpr inline string_view<char8_t> potential_line_endings<char8_t>[] = {
47 u8"\r\n", u8"\r", u8"\n"};
48template <>
49constexpr inline string_view<char16_t> potential_line_endings<char16_t>[] = {
50 u"\r\n", u"\r", u"\n"};
51template <>
52constexpr inline string_view<char32_t> potential_line_endings<char32_t>[] = {
53 U"\r\n", U"\r", U"\n"};
54
55// null-terminator for the different character types
56template <class char_type>
57constexpr char_type null_char = std::declval<char_type>();
58template <>
59constexpr inline char null_char<char> = '\0';
60template <>
61constexpr inline wchar_t null_char<wchar_t> = L'\0';
62template <>
63constexpr inline char8_t null_char<char8_t> = static_cast<char8_t>('\0');
64template <>
65constexpr inline char16_t null_char<char16_t> = u'\0';
66template <>
67constexpr inline char32_t null_char<char32_t> = U'\0';
68
69// detects the line ending used within a string.
70// e.g. detect_line_ending("foo\nbar\nbaz") -> "\n"
71template <class char_type>
72consteval string_view<char_type> detect_line_ending(
73 string_view<char_type> str) {
74 return *std::ranges::max_element(potential_line_endings<char_type>, {},
75 [str](string_view<char_type> line_ending) {
76 // count the number of lines we would get with line_ending
77 auto view =
78 std::views::split(str, line_ending);
79 return std::ranges::distance(view);
80 });
81}
82
83// returns a view to the leading sequence of space characters within a string
84// e.g. get_leading_space_sequence(" \t foo") -> " \t "
85template <class char_type>
86consteval string_view<char_type> get_leading_space_sequence(
87 string_view<char_type> line) {
88 return line.substr(0, line.find_first_not_of(space_chars<char_type>));
89}
90
91// checks if a line consists purely out of space characters
92// e.g. is_line_empty(" \t") -> true
93// is_line_empty(" foo") -> false
94template <class char_type>
95consteval bool is_line_empty(string_view<char_type> line) {
96 return get_leading_space_sequence(line).size() == line.size();
97}
98
99// splits a string into individual lines
100// and removes the first & last line if they are empty
101// e.g. split_lines("\na\nb\nc\n", "\n") -> {"a", "b", "c"}
102template <class char_type>
103consteval std::vector<string_view<char_type>> split_lines(
104 string_view<char_type> str, string_view<char_type> line_ending) {
105 std::vector<string_view<char_type>> lines;
106
107 for (auto line : std::views::split(str, line_ending)) {
108 lines.emplace_back(line.begin(), line.end());
109 }
110
111 // remove first/last lines in case they are completely empty
112 if (lines.size() > 1 && is_line_empty(lines[0])) {
113 lines.erase(lines.begin());
114 }
115 if (lines.size() > 1 && is_line_empty(lines[lines.size() - 1])) {
116 lines.erase(lines.end() - 1);
117 }
118
119 return lines;
120}
121
122// determines the longest possible sequence of space characters
123// that we can remove from each line.
124// e.g. determine_common_space_prefix_sequence({" \ta", " foo", " \t\tbar"}) -> " "
125template <class char_type>
126consteval string_view<char_type> determine_common_space_prefix_sequence(
127 std::vector<string_view<char_type>> const& lines) {
128 std::vector<string_view<char_type>> space_sequences = {
129 string_view<char_type>{} // empty string
130 };
131
132 for (string_view<char_type> line : lines) {
133 string_view<char_type> spaces = get_leading_space_sequence(line);
134 for (std::size_t len = 1; len <= spaces.size(); len++) {
135 space_sequences.emplace_back(spaces.substr(0, len));
136 }
137
138 // remove duplicates
139 std::ranges::sort(space_sequences);
140 auto [first, last] = std::ranges::unique(space_sequences);
141 space_sequences.erase(first, last);
142 }
143
144 // only consider space prefix sequences that apply to all lines
145 // (ignoring completely blank lines)
146 auto shared_prefixes = std::views::filter(
147 space_sequences, [&lines](string_view<char_type> prefix) {
148 return std::ranges::all_of(
149 lines, [&prefix](string_view<char_type> line) {
150 return line.empty() || line.starts_with(prefix);
151 });
152 });
153
154 // select the longest possible space prefix sequence
155 return *std::ranges::max_element(shared_prefixes, {},
156 &string_view<char_type>::size);
157}
158
159// unindents the individual lines of a raw string literal
160// e.g. unindent_string(" \n a\n b\n c\n") -> "a\nb\nc"
161template <class char_type>
162consteval std::vector<char_type> unindent_string(string_view<char_type> str) {
163 string_view<char_type> line_ending = detect_line_ending(str);
164 std::vector<string_view<char_type>> lines = split_lines(str, line_ending);
165 string_view<char_type> common_space_sequence =
166 determine_common_space_prefix_sequence(lines);
167
168 std::vector<char_type> new_string;
169 bool is_first = true;
170 for (auto line : lines) {
171 // append newline
172 if (is_first) {
173 is_first = false;
174 } else {
175 new_string.insert(new_string.end(), line_ending.begin(),
176 line_ending.end());
177 }
178
179 // append unindented line
180 if (line.empty()) {
181 continue;
182 }
183 auto unindented = line.substr(common_space_sequence.size());
184 new_string.insert(new_string.end(), unindented.begin(), unindented.end());
185 }
186
187 // add null terminator
188 new_string.push_back(null_char<char_type>);
189
190 return new_string;
191}
192
193// returns the size required for the unindented string
194template <class char_type>
195consteval std::size_t unindent_string_size(string_view<char_type> str) {
196 return unindent_string(str).size();
197}
198
199// simple type that stores a raw string
200// we need this to get around the limitation that string literals
201// are not considered valid non-type template arguments.
202template <class _char_type, std::size_t size>
203struct string_wrapper {
204 using char_type = _char_type;
205
206 consteval string_wrapper(const char_type (&arr)[size]) {
207 std::ranges::copy(arr, str);
208 }
209
210 char_type str[size];
211};
212
213// used for sneakily creating and storing
214// the unindented string in a template parameter.
215template <string_wrapper sw>
216struct unindented_string_wrapper {
217 using char_type = typename decltype(sw)::char_type;
218 static constexpr std::size_t buffer_size =
219 unindent_string_size<char_type>(sw.str);
220 using array_ref = const char_type (&)[buffer_size];
221
222 consteval unindented_string_wrapper(int) {
223 auto newstr = unindent_string<char_type>(sw.str);
224 std::ranges::copy(newstr, buffer);
225 }
226
227 consteval array_ref get() const { return buffer; }
228
229 char_type buffer[buffer_size];
230};
231
232// uses a defaulted template argument that depends on the str
233// to initialize the unindented string within a template parameter.
234// this enables us to return a reference to the unindented string.
235template <string_wrapper str, unindented_string_wrapper<str> unindented = 0>
236consteval decltype(auto) do_unindent() {
237 return unindented.get();
238}
239
240// the actual user-defined string literal operator
241template <string_wrapper str>
242consteval decltype(auto) operator"" _unindent() {
243 return do_unindent<str>();
244}
245} // namespace multiline_raw_string
246
247using multiline_raw_string::operator"" _unindent;
248
249#endif // ACTIONENGINE_REDIS_CHUNK_STORE_OPS_UNINDENT_H_