Line | Branch | Exec | Source |
---|---|---|---|
1 | // | ||
2 | // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) | ||
3 | // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com) | ||
4 | // | ||
5 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
6 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
7 | // | ||
8 | // Official repository: https://github.com/boostorg/url | ||
9 | // | ||
10 | |||
11 | |||
12 | #include <boost/url/detail/config.hpp> | ||
13 | #include <boost/url/decode_view.hpp> | ||
14 | #include "decode.hpp" | ||
15 | #include <boost/url/segments_encoded_view.hpp> | ||
16 | #include <boost/url/grammar/ci_string.hpp> | ||
17 | #include <boost/assert.hpp> | ||
18 | #include <boost/core/ignore_unused.hpp> | ||
19 | #include <cstring> | ||
20 | #include "normalize.hpp" | ||
21 | |||
22 | namespace boost { | ||
23 | namespace urls { | ||
24 | namespace detail { | ||
25 | |||
26 | void | ||
27 | 7280 | pop_encoded_front( | |
28 | core::string_view& s, | ||
29 | char& c, | ||
30 | std::size_t& n) noexcept | ||
31 | { | ||
32 |
2/2✓ Branch 1 taken 7190 times.
✓ Branch 2 taken 90 times.
|
7280 | if(s.front() != '%') |
33 | { | ||
34 | 7190 | c = s.front(); | |
35 | 7190 | s.remove_prefix(1); | |
36 | } | ||
37 | else | ||
38 | { | ||
39 | 90 | detail::decode_unsafe( | |
40 | &c, | ||
41 | &c + 1, | ||
42 | s.substr(0, 3)); | ||
43 | 90 | s.remove_prefix(3); | |
44 | } | ||
45 | 7280 | ++n; | |
46 | 7280 | } | |
47 | |||
48 | int | ||
49 | 77 | compare_encoded( | |
50 | core::string_view lhs, | ||
51 | core::string_view rhs) noexcept | ||
52 | { | ||
53 | 77 | std::size_t n0 = 0; | |
54 | 77 | std::size_t n1 = 0; | |
55 | 77 | char c0 = 0; | |
56 | 77 | char c1 = 0; | |
57 | 205 | while( | |
58 |
4/4✓ Branch 1 taken 253 times.
✓ Branch 2 taken 29 times.
✓ Branch 3 taken 240 times.
✓ Branch 4 taken 42 times.
|
535 | !lhs.empty() && |
59 |
2/2✓ Branch 1 taken 240 times.
✓ Branch 2 taken 13 times.
|
253 | !rhs.empty()) |
60 | { | ||
61 | 240 | pop_encoded_front(lhs, c0, n0); | |
62 | 240 | pop_encoded_front(rhs, c1, n1); | |
63 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 220 times.
|
240 | if (c0 < c1) |
64 | 20 | return -1; | |
65 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 205 times.
|
220 | if (c1 < c0) |
66 | 15 | return 1; | |
67 | } | ||
68 | 42 | n0 += detail::decode_bytes_unsafe(lhs); | |
69 | 42 | n1 += detail::decode_bytes_unsafe(rhs); | |
70 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
|
42 | if (n0 == n1) |
71 | 21 | return 0; | |
72 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 13 times.
|
21 | if (n0 < n1) |
73 | 8 | return -1; | |
74 | 13 | return 1; | |
75 | } | ||
76 | |||
77 | void | ||
78 | 1216 | digest_encoded( | |
79 | core::string_view s, | ||
80 | fnv_1a& hasher) noexcept | ||
81 | { | ||
82 | 1216 | char c = 0; | |
83 | 1216 | std::size_t n = 0; | |
84 |
2/2✓ Branch 1 taken 508 times.
✓ Branch 2 taken 1216 times.
|
1724 | while(!s.empty()) |
85 | { | ||
86 | 508 | pop_encoded_front(s, c, n); | |
87 | 508 | hasher.put(c); | |
88 | } | ||
89 | 1216 | } | |
90 | |||
91 | int | ||
92 | 159 | ci_compare_encoded( | |
93 | core::string_view lhs, | ||
94 | core::string_view rhs) noexcept | ||
95 | { | ||
96 | 159 | std::size_t n0 = 0; | |
97 | 159 | std::size_t n1 = 0; | |
98 | 159 | char c0 = 0; | |
99 | 159 | char c1 = 0; | |
100 | 2105 | while ( | |
101 |
4/4✓ Branch 1 taken 2121 times.
✓ Branch 2 taken 143 times.
✓ Branch 3 taken 2115 times.
✓ Branch 4 taken 149 times.
|
4385 | !lhs.empty() && |
102 |
2/2✓ Branch 1 taken 2115 times.
✓ Branch 2 taken 6 times.
|
2121 | !rhs.empty()) |
103 | { | ||
104 | 2115 | pop_encoded_front(lhs, c0, n0); | |
105 | 2115 | pop_encoded_front(rhs, c1, n1); | |
106 | 2115 | c0 = grammar::to_lower(c0); | |
107 | 2115 | c1 = grammar::to_lower(c1); | |
108 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2107 times.
|
2115 | if (c0 < c1) |
109 | 8 | return -1; | |
110 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2105 times.
|
2107 | if (c1 < c0) |
111 | 2 | return 1; | |
112 | } | ||
113 | 149 | n0 += detail::decode_bytes_unsafe(lhs); | |
114 | 149 | n1 += detail::decode_bytes_unsafe(rhs); | |
115 |
2/2✓ Branch 0 taken 142 times.
✓ Branch 1 taken 7 times.
|
149 | if (n0 == n1) |
116 | 142 | return 0; | |
117 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 6 times.
|
7 | if (n0 < n1) |
118 | 1 | return -1; | |
119 | 6 | return 1; | |
120 | } | ||
121 | |||
122 | void | ||
123 | 304 | ci_digest_encoded( | |
124 | core::string_view s, | ||
125 | fnv_1a& hasher) noexcept | ||
126 | { | ||
127 | 304 | char c = 0; | |
128 | 304 | std::size_t n = 0; | |
129 |
2/2✓ Branch 1 taken 2062 times.
✓ Branch 2 taken 304 times.
|
2366 | while(!s.empty()) |
130 | { | ||
131 | 2062 | pop_encoded_front(s, c, n); | |
132 | 2062 | c = grammar::to_lower(c); | |
133 | 2062 | hasher.put(c); | |
134 | } | ||
135 | 304 | } | |
136 | |||
137 | int | ||
138 | 46 | compare( | |
139 | core::string_view lhs, | ||
140 | core::string_view rhs) noexcept | ||
141 | { | ||
142 | 46 | auto rlen = (std::min)(lhs.size(), rhs.size()); | |
143 |
2/2✓ Branch 0 taken 79 times.
✓ Branch 1 taken 25 times.
|
104 | for (std::size_t i = 0; i < rlen; ++i) |
144 | { | ||
145 | 79 | char c0 = lhs[i]; | |
146 | 79 | char c1 = rhs[i]; | |
147 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 66 times.
|
79 | if (c0 < c1) |
148 | 13 | return -1; | |
149 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 58 times.
|
66 | if (c1 < c0) |
150 | 8 | return 1; | |
151 | } | ||
152 |
2/2✓ Branch 2 taken 4 times.
✓ Branch 3 taken 21 times.
|
25 | if ( lhs.size() == rhs.size() ) |
153 | 4 | return 0; | |
154 |
2/2✓ Branch 2 taken 8 times.
✓ Branch 3 taken 13 times.
|
21 | if ( lhs.size() < rhs.size() ) |
155 | 8 | return -1; | |
156 | 13 | return 1; | |
157 | } | ||
158 | |||
159 | int | ||
160 | 196 | ci_compare( | |
161 | core::string_view lhs, | ||
162 | core::string_view rhs) noexcept | ||
163 | { | ||
164 | 196 | auto rlen = (std::min)(lhs.size(), rhs.size()); | |
165 |
2/2✓ Branch 0 taken 800 times.
✓ Branch 1 taken 189 times.
|
989 | for (std::size_t i = 0; i < rlen; ++i) |
166 | { | ||
167 | 800 | char c0 = grammar::to_lower(lhs[i]); | |
168 | 800 | char c1 = grammar::to_lower(rhs[i]); | |
169 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 794 times.
|
800 | if (c0 < c1) |
170 | 6 | return -1; | |
171 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 793 times.
|
794 | if (c1 < c0) |
172 | 1 | return 1; | |
173 | } | ||
174 |
2/2✓ Branch 2 taken 182 times.
✓ Branch 3 taken 7 times.
|
189 | if ( lhs.size() == rhs.size() ) |
175 | 182 | return 0; | |
176 |
2/2✓ Branch 2 taken 6 times.
✓ Branch 3 taken 1 times.
|
7 | if ( lhs.size() < rhs.size() ) |
177 | 6 | return -1; | |
178 | 1 | return 1; | |
179 | } | ||
180 | |||
181 | void | ||
182 | 304 | ci_digest( | |
183 | core::string_view s, | ||
184 | fnv_1a& hasher) noexcept | ||
185 | { | ||
186 |
2/2✓ Branch 2 taken 730 times.
✓ Branch 3 taken 304 times.
|
1034 | for (char c: s) |
187 | { | ||
188 | 730 | c = grammar::to_lower(c); | |
189 | 730 | hasher.put(c); | |
190 | } | ||
191 | 304 | } | |
192 | |||
193 | /* Check if a string ends with the specified suffix (decoded comparison) | ||
194 | |||
195 | This function determines if a string ends with the specified suffix | ||
196 | when the string and suffix are compared after percent-decoding. | ||
197 | |||
198 | @param str The string to check (percent-encoded) | ||
199 | @param suffix The suffix to check for (percent-decoded) | ||
200 | @return The number of encoded chars consumed in the string | ||
201 | */ | ||
202 | std::size_t | ||
203 | 2136 | path_ends_with( | |
204 | core::string_view str, | ||
205 | core::string_view suffix) noexcept | ||
206 | { | ||
207 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
|
2136 | BOOST_ASSERT(!str.empty()); |
208 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
|
2136 | BOOST_ASSERT(!suffix.empty()); |
209 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
|
2136 | BOOST_ASSERT(!suffix.contains("%2F")); |
210 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
|
2136 | BOOST_ASSERT(!suffix.contains("%2f")); |
211 | 5848 | auto consume_last = []( | |
212 | core::string_view::iterator& it, | ||
213 | core::string_view::iterator& end, | ||
214 | char& c) | ||
215 | { | ||
216 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5848 times.
|
5848 | BOOST_ASSERT(end > it); |
217 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5848 times.
|
5848 | BOOST_ASSERT(it != end); |
218 |
4/4✓ Branch 0 taken 3960 times.
✓ Branch 1 taken 1888 times.
✓ Branch 2 taken 5800 times.
✓ Branch 3 taken 48 times.
|
9808 | if ((end - it) < 3 || |
219 |
2/2✓ Branch 1 taken 3912 times.
✓ Branch 2 taken 48 times.
|
3960 | *(std::prev(end, 3)) != '%') |
220 | { | ||
221 | 5800 | c = *--end; | |
222 | 5800 | return false; | |
223 | } | ||
224 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | detail::decode_unsafe( |
225 | &c, | ||
226 | &c + 1, | ||
227 | core::string_view(std::prev( | ||
228 | end, 3), 3)); | ||
229 | 48 | end -= 3; | |
230 | 48 | return true; | |
231 | }; | ||
232 | |||
233 | 2136 | auto it0 = str.begin(); | |
234 | 2136 | auto end0 = str.end(); | |
235 | 2136 | auto it1 = suffix.begin(); | |
236 | 2136 | auto end1 = suffix.end(); | |
237 | 2136 | char c0 = 0; | |
238 | 2136 | char c1 = 0; | |
239 | 1112 | while( | |
240 |
2/2✓ Branch 0 taken 3006 times.
✓ Branch 1 taken 242 times.
|
3248 | it0 < end0 && |
241 |
2/2✓ Branch 0 taken 2932 times.
✓ Branch 1 taken 74 times.
|
3006 | it1 < end1) |
242 | { | ||
243 | 2932 | bool const is_encoded = consume_last(it0, end0, c0); | |
244 | // The suffix never contains an encoded slash (%2F), and a decoded | ||
245 | // slash is not equivalent to an encoded slash | ||
246 |
4/4✓ Branch 0 taken 48 times.
✓ Branch 1 taken 2884 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 32 times.
|
2932 | if (is_encoded && c0 == '/') |
247 | 16 | return 0; | |
248 | 2916 | consume_last(it1, end1, c1); | |
249 |
2/2✓ Branch 0 taken 1804 times.
✓ Branch 1 taken 1112 times.
|
2916 | if (c0 != c1) |
250 | 1804 | return 0; | |
251 | } | ||
252 | 316 | bool const consumed_suffix = it1 == end1; | |
253 |
2/2✓ Branch 0 taken 110 times.
✓ Branch 1 taken 206 times.
|
316 | if (consumed_suffix) |
254 | { | ||
255 | 110 | std::size_t const consumed_encoded = str.end() - end0; | |
256 | 110 | return consumed_encoded; | |
257 | } | ||
258 | 206 | return 0; | |
259 | } | ||
260 | |||
261 | std::size_t | ||
262 | 832 | remove_dot_segments( | |
263 | char* dest0, | ||
264 | char const* end, | ||
265 | core::string_view input) noexcept | ||
266 | { | ||
267 | // 1. The input buffer `s` is initialized with | ||
268 | // the now-appended path components and the | ||
269 | // output buffer `dest0` is initialized to | ||
270 | // the empty string. | ||
271 | 832 | char* dest = dest0; | |
272 | 832 | bool const is_absolute = input.starts_with('/'); | |
273 | |||
274 | // Step 2 is a loop through 5 production rules: | ||
275 | // https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4 | ||
276 | // | ||
277 | // There are no transitions between all rules, | ||
278 | // which enables some optimizations. | ||
279 | // | ||
280 | // Initial: | ||
281 | // - Rule A: handle initial dots | ||
282 | // If the input buffer begins with a | ||
283 | // prefix of "../" or "./", then remove | ||
284 | // that prefix from the input buffer. | ||
285 | // Rule A can only happen at the beginning. | ||
286 | // Errata 4547: Keep "../" in the beginning | ||
287 | // https://www.rfc-editor.org/errata/eid4547 | ||
288 | // | ||
289 | // Then: | ||
290 | // - Rule D: ignore a final ".." or "." | ||
291 | // if the input buffer consists only of "." | ||
292 | // or "..", then remove that from the input | ||
293 | // buffer. | ||
294 | // Rule D can only happen after Rule A because: | ||
295 | // - B and C write "/" to the input | ||
296 | // - E writes "/" to input or returns | ||
297 | // | ||
298 | // Then: | ||
299 | // - Rule B: ignore ".": write "/" to the input | ||
300 | // - Rule C: apply "..": remove seg and write "/" | ||
301 | // - Rule E: copy complete segment | ||
302 | auto append = | ||
303 | 1492 | [](char*& first, char const* last, core::string_view in) | |
304 | { | ||
305 | // append `in` to `dest` | ||
306 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1492 times.
|
1492 | BOOST_ASSERT(in.size() <= std::size_t(last - first)); |
307 | 1492 | std::memmove(first, in.data(), in.size()); | |
308 | 1492 | first += in.size(); | |
309 | ignore_unused(last); | ||
310 | 1492 | }; | |
311 | |||
312 | 9563 | auto dot_starts_with = []( | |
313 | core::string_view str, core::string_view dots, std::size_t& n) | ||
314 | { | ||
315 | // starts_with for encoded/decoded dots | ||
316 | // or decoded otherwise. return how many | ||
317 | // chars in str match the dots | ||
318 | 9563 | n = 0; | |
319 |
2/2✓ Branch 2 taken 16368 times.
✓ Branch 3 taken 550 times.
|
16918 | for (char c: dots) |
320 | { | ||
321 |
2/2✓ Branch 1 taken 7355 times.
✓ Branch 2 taken 9013 times.
|
16368 | if (str.starts_with(c)) |
322 | { | ||
323 | 7355 | str.remove_prefix(1); | |
324 | 7355 | ++n; | |
325 | 7355 | continue; | |
326 | } | ||
327 | |||
328 | // In the general case, we would need to | ||
329 | // check if the next char is an encoded | ||
330 | // dot. | ||
331 | // However, an encoded dot in `str` | ||
332 | // would have already been decoded in | ||
333 | // url_base::normalize_path(). | ||
334 | // This needs to be undone if | ||
335 | // `remove_dot_segments` is used in a | ||
336 | // different context. | ||
337 | // if (str.size() > 2 && | ||
338 | // c == '.' | ||
339 | // && | ||
340 | // str[0] == '%' && | ||
341 | // str[1] == '2' && | ||
342 | // (str[2] == 'e' || | ||
343 | // str[2] == 'E')) | ||
344 | // { | ||
345 | // str.remove_prefix(3); | ||
346 | // n += 3; | ||
347 | // continue; | ||
348 | // } | ||
349 | |||
350 | 9013 | n = 0; | |
351 | 9013 | return false; | |
352 | } | ||
353 | 550 | return true; | |
354 | }; | ||
355 | |||
356 | 4777 | auto dot_equal = [&dot_starts_with]( | |
357 | 4777 | core::string_view str, core::string_view dots) | |
358 | { | ||
359 | 4777 | std::size_t n = 0; | |
360 | 4777 | dot_starts_with(str, dots, n); | |
361 | 4777 | return n == str.size(); | |
362 | 832 | }; | |
363 | |||
364 | // Rule A | ||
365 | std::size_t n; | ||
366 |
2/2✓ Branch 1 taken 767 times.
✓ Branch 2 taken 81 times.
|
848 | while (!input.empty()) |
367 | { | ||
368 |
2/2✓ Branch 2 taken 4 times.
✓ Branch 3 taken 763 times.
|
767 | if (dot_starts_with(input, "../", n)) |
369 | { | ||
370 | // Errata 4547 | ||
371 | 4 | append(dest, end, "../"); | |
372 | 4 | input.remove_prefix(n); | |
373 | 4 | continue; | |
374 | } | ||
375 |
2/2✓ Branch 2 taken 751 times.
✓ Branch 3 taken 12 times.
|
763 | else if (!dot_starts_with(input, "./", n)) |
376 | { | ||
377 | 751 | break; | |
378 | } | ||
379 | 12 | input.remove_prefix(n); | |
380 | } | ||
381 | |||
382 | // Rule D | ||
383 |
2/2✓ Branch 2 taken 82 times.
✓ Branch 3 taken 750 times.
|
832 | if( dot_equal(input, ".")) |
384 | { | ||
385 | 82 | input = {}; | |
386 | } | ||
387 |
2/2✓ Branch 2 taken 3 times.
✓ Branch 3 taken 747 times.
|
750 | else if( dot_equal(input, "..") ) |
388 | { | ||
389 | // Errata 4547 | ||
390 | 3 | append(dest, end, ".."); | |
391 | 3 | input = {}; | |
392 | } | ||
393 | |||
394 | // 2. While the input buffer is not empty, | ||
395 | // loop as follows: | ||
396 |
2/2✓ Branch 1 taken 1648 times.
✓ Branch 2 taken 793 times.
|
2441 | while (!input.empty()) |
397 | { | ||
398 | // Rule B | ||
399 | 1648 | bool const is_dot_seg = dot_starts_with(input, "/./", n); | |
400 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 1616 times.
|
1648 | if (is_dot_seg) |
401 | { | ||
402 | 32 | input.remove_prefix(n - 1); | |
403 | 32 | continue; | |
404 | } | ||
405 | |||
406 | 1616 | bool const is_final_dot_seg = dot_equal(input, "/."); | |
407 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1608 times.
|
1616 | if (is_final_dot_seg) |
408 | { | ||
409 | // We can't remove "." from a core::string_view | ||
410 | // So what we do here is equivalent to | ||
411 | // replacing s with '/' as required | ||
412 | // in Rule B and executing the next | ||
413 | // iteration, which would append this | ||
414 | // '/' to the output, as required by | ||
415 | // Rule E | ||
416 | 8 | append(dest, end, input.substr(0, 1)); | |
417 | 8 | input = {}; | |
418 | 8 | break; | |
419 | } | ||
420 | |||
421 | // Rule C | ||
422 | 1608 | bool const is_dotdot_seg = dot_starts_with(input, "/../", n); | |
423 |
2/2✓ Branch 0 taken 193 times.
✓ Branch 1 taken 1415 times.
|
1608 | if (is_dotdot_seg) |
424 | { | ||
425 | 193 | core::string_view cur_out(dest0, dest - dest0); | |
426 | 193 | std::size_t p = cur_out.find_last_of('/'); | |
427 | 193 | bool const has_multiple_segs = p != core::string_view::npos; | |
428 |
2/2✓ Branch 0 taken 132 times.
✓ Branch 1 taken 61 times.
|
193 | if (has_multiple_segs) |
429 | { | ||
430 | // output has multiple segments | ||
431 | // "erase" [p, end] if not "/.." | ||
432 | 132 | core::string_view last_seg(dest0 + p, dest - (dest0 + p)); | |
433 | 132 | bool const prev_is_dotdot_seg = dot_equal(last_seg, "/.."); | |
434 |
2/2✓ Branch 0 taken 121 times.
✓ Branch 1 taken 11 times.
|
132 | if (!prev_is_dotdot_seg) |
435 | { | ||
436 | 121 | dest = dest0 + p; | |
437 | } | ||
438 | else | ||
439 | { | ||
440 | 11 | append(dest, end, "/.."); | |
441 | } | ||
442 | } | ||
443 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 50 times.
|
61 | else if (dest0 != dest) |
444 | { | ||
445 | // Only one segment in the output: remove it | ||
446 | 11 | core::string_view last_seg(dest0, dest - dest0); | |
447 | 11 | bool const prev_is_dotdot_seg = dot_equal(last_seg, ".."); | |
448 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 2 times.
|
11 | if (!prev_is_dotdot_seg) |
449 | { | ||
450 | 9 | dest = dest0; | |
451 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | if (!is_absolute) |
452 | { | ||
453 | 9 | input.remove_prefix(1); | |
454 | } | ||
455 | } | ||
456 | else | ||
457 | { | ||
458 | 2 | append(dest, end, "/.."); | |
459 | } | ||
460 | } | ||
461 | else | ||
462 | { | ||
463 | // Output is empty | ||
464 |
1/2✓ Branch 0 taken 50 times.
✗ Branch 1 not taken.
|
50 | if (is_absolute) |
465 | { | ||
466 | 50 | append(dest, end, "/.."); | |
467 | } | ||
468 | else | ||
469 | { | ||
470 | // AFREITAS: Although we have no formal proof | ||
471 | // for that, the output can't be relative | ||
472 | // and empty at this point because relative | ||
473 | // paths will fall in the `dest0 != dest` | ||
474 | // case above of this rule C and then the | ||
475 | // general case of rule E for "..". | ||
476 | ✗ | append(dest, end, ".."); | |
477 | } | ||
478 | } | ||
479 | 193 | input.remove_prefix(n - 1); | |
480 | 193 | continue; | |
481 | } | ||
482 | |||
483 | 1415 | bool const is_final_dotdot_seg = dot_equal(input, "/.."); | |
484 |
2/2✓ Branch 0 taken 31 times.
✓ Branch 1 taken 1384 times.
|
1415 | if (is_final_dotdot_seg) |
485 | { | ||
486 | 31 | core::string_view cur_out(dest0, dest - dest0); | |
487 | 31 | std::size_t p = cur_out.find_last_of('/'); | |
488 | 31 | bool const has_multiple_segs = p != core::string_view::npos; | |
489 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 13 times.
|
31 | if (has_multiple_segs) |
490 | { | ||
491 | // output has multiple segments | ||
492 | // "erase" [p, end] if not "/.." | ||
493 | 18 | core::string_view last_seg(dest0 + p, dest - (dest0 + p)); | |
494 | 18 | bool const prev_is_dotdot_seg = dot_equal(last_seg, "/.."); | |
495 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | if (!prev_is_dotdot_seg) |
496 | { | ||
497 | 14 | dest = dest0 + p; | |
498 | 14 | append(dest, end, "/"); | |
499 | } | ||
500 | else | ||
501 | { | ||
502 | 4 | append(dest, end, "/.."); | |
503 | } | ||
504 | } | ||
505 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 10 times.
|
13 | else if (dest0 != dest) |
506 | { | ||
507 | // Only one segment in the output: remove it | ||
508 | 3 | core::string_view last_seg(dest0, dest - dest0); | |
509 | 3 | bool const prev_is_dotdot_seg = dot_equal(last_seg, ".."); | |
510 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | if (!prev_is_dotdot_seg) { |
511 | 1 | dest = dest0; | |
512 | } | ||
513 | else | ||
514 | { | ||
515 | 2 | append(dest, end, "/.."); | |
516 | } | ||
517 | } | ||
518 | else | ||
519 | { | ||
520 | // Output is empty: append dotdot | ||
521 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (is_absolute) |
522 | { | ||
523 | 10 | append(dest, end, "/.."); | |
524 | } | ||
525 | else | ||
526 | { | ||
527 | // AFREITAS: Although we have no formal proof | ||
528 | // for that, the output can't be relative | ||
529 | // and empty at this point because relative | ||
530 | // paths will fall in the `dest0 != dest` | ||
531 | // case above of this rule C and then the | ||
532 | // general case of rule E for "..". | ||
533 | ✗ | append(dest, end, ".."); | |
534 | } | ||
535 | } | ||
536 | 31 | input = {}; | |
537 | 31 | break; | |
538 | } | ||
539 | |||
540 | // Rule E | ||
541 | 1384 | std::size_t p = input.find_first_of('/', 1); | |
542 |
2/2✓ Branch 0 taken 676 times.
✓ Branch 1 taken 708 times.
|
1384 | if (p != core::string_view::npos) |
543 | { | ||
544 | 676 | append(dest, end, input.substr(0, p)); | |
545 | 676 | input.remove_prefix(p); | |
546 | } | ||
547 | else | ||
548 | { | ||
549 | 708 | append(dest, end, input); | |
550 | 708 | input = {}; | |
551 | } | ||
552 | } | ||
553 | |||
554 | // 3. Finally, the output buffer is set | ||
555 | // as the result of remove_dot_segments, | ||
556 | // and we return its size | ||
557 | 832 | return dest - dest0; | |
558 | } | ||
559 | |||
560 | char | ||
561 | 1154 | path_pop_back( core::string_view& s ) | |
562 | { | ||
563 |
4/4✓ Branch 1 taken 522 times.
✓ Branch 2 taken 632 times.
✓ Branch 3 taken 1102 times.
✓ Branch 4 taken 52 times.
|
1676 | if (s.size() < 3 || |
564 |
3/4✓ Branch 2 taken 522 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 470 times.
✓ Branch 5 taken 52 times.
|
522 | *std::prev(s.end(), 3) != '%') |
565 | { | ||
566 | 1102 | char c = s.back(); | |
567 | 1102 | s.remove_suffix(1); | |
568 | 1102 | return c; | |
569 | } | ||
570 | 52 | char c = 0; | |
571 |
1/2✓ Branch 2 taken 52 times.
✗ Branch 3 not taken.
|
104 | detail::decode_unsafe( |
572 | 104 | &c, &c + 1, s.substr(s.size() - 3)); | |
573 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 8 times.
|
52 | if (c != '/') |
574 | { | ||
575 | 44 | s.remove_suffix(3); | |
576 | 44 | return c; | |
577 | } | ||
578 | 8 | c = s.back(); | |
579 | 8 | s.remove_suffix(1); | |
580 | 8 | return c; | |
581 | }; | ||
582 | |||
583 | void | ||
584 | 538 | pop_last_segment( | |
585 | core::string_view& str, | ||
586 | core::string_view& seg, | ||
587 | std::size_t& level, | ||
588 | bool remove_unmatched) noexcept | ||
589 | { | ||
590 | 538 | seg = {}; | |
591 | 538 | std::size_t n = 0; | |
592 |
2/2✓ Branch 1 taken 558 times.
✓ Branch 2 taken 142 times.
|
700 | while (!str.empty()) |
593 | { | ||
594 | // B. if the input buffer begins with a | ||
595 | // prefix of "/./" or "/.", where "." is | ||
596 | // a complete path segment, then replace | ||
597 | // that prefix with "/" in the input | ||
598 | // buffer; otherwise, | ||
599 | 558 | n = detail::path_ends_with(str, "/./"); | |
600 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 548 times.
|
558 | if (n) |
601 | { | ||
602 | 10 | seg = str.substr(str.size() - n); | |
603 | 10 | str.remove_suffix(n); | |
604 | 10 | continue; | |
605 | } | ||
606 | 548 | n = detail::path_ends_with(str, "/."); | |
607 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 536 times.
|
548 | if (n) |
608 | { | ||
609 | 12 | seg = str.substr(str.size() - n, 1); | |
610 | 12 | str.remove_suffix(n); | |
611 | 12 | continue; | |
612 | } | ||
613 | |||
614 | // C. if the input buffer begins with a | ||
615 | // prefix of "/../" or "/..", where ".." | ||
616 | // is a complete path segment, then | ||
617 | // replace that prefix with "/" in the | ||
618 | // input buffer and remove the last | ||
619 | // segment and its preceding "/" | ||
620 | // (if any) from the output buffer | ||
621 | // otherwise, | ||
622 | 536 | n = detail::path_ends_with(str, "/../"); | |
623 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 494 times.
|
536 | if (n) |
624 | { | ||
625 | 42 | seg = str.substr(str.size() - n); | |
626 | 42 | str.remove_suffix(n); | |
627 | 42 | ++level; | |
628 | 42 | continue; | |
629 | } | ||
630 | 494 | n = detail::path_ends_with(str, "/.."); | |
631 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 448 times.
|
494 | if (n) |
632 | { | ||
633 | 46 | seg = str.substr(str.size() - n); | |
634 | 46 | str.remove_suffix(n); | |
635 | 46 | ++level; | |
636 | 46 | continue; | |
637 | } | ||
638 | |||
639 | // E. move the first path segment in the | ||
640 | // input buffer to the end of the output | ||
641 | // buffer, including the initial "/" | ||
642 | // character (if any) and any subsequent | ||
643 | // characters up to, but not including, | ||
644 | // the next "/" character or the end of | ||
645 | // the input buffer. | ||
646 | 448 | std::size_t p = str.size() > 1 | |
647 |
2/2✓ Branch 0 taken 374 times.
✓ Branch 1 taken 74 times.
|
448 | ? str.find_last_of('/', str.size() - 2) |
648 | 448 | : core::string_view::npos; | |
649 |
2/2✓ Branch 0 taken 276 times.
✓ Branch 1 taken 172 times.
|
448 | if (p != core::string_view::npos) |
650 | { | ||
651 | 276 | seg = str.substr(p + 1); | |
652 | 276 | str.remove_suffix(seg.size()); | |
653 | } | ||
654 | else | ||
655 | { | ||
656 | 172 | seg = str; | |
657 | 172 | str = {}; | |
658 | } | ||
659 | |||
660 |
2/2✓ Branch 0 taken 396 times.
✓ Branch 1 taken 52 times.
|
448 | if (level == 0) |
661 | 396 | return; | |
662 |
2/2✓ Branch 1 taken 42 times.
✓ Branch 2 taken 10 times.
|
52 | if (!str.empty()) |
663 | 42 | --level; | |
664 | } | ||
665 | // we still need to skip n_skip + 1 | ||
666 | // but the string is empty | ||
667 |
4/4✓ Branch 0 taken 42 times.
✓ Branch 1 taken 100 times.
✓ Branch 2 taken 34 times.
✓ Branch 3 taken 8 times.
|
142 | if (remove_unmatched && level) |
668 | { | ||
669 | 34 | seg = "/"; | |
670 | 34 | level = 0; | |
671 | 34 | return; | |
672 | } | ||
673 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 104 times.
|
108 | else if (level) |
674 | { | ||
675 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | if (!seg.empty()) |
676 | { | ||
677 | 4 | seg = "/../"; | |
678 | } | ||
679 | else | ||
680 | { | ||
681 | // AFREITAS: this condition | ||
682 | // is correct, but it might | ||
683 | // unreachable. | ||
684 | ✗ | seg = "/.."; | |
685 | } | ||
686 | 4 | --level; | |
687 | 4 | return; | |
688 | } | ||
689 | 104 | seg = {}; | |
690 | } | ||
691 | |||
692 | void | ||
693 | 304 | normalized_path_digest( | |
694 | core::string_view str, | ||
695 | bool remove_unmatched, | ||
696 | fnv_1a& hasher) noexcept | ||
697 | { | ||
698 | 304 | core::string_view seg; | |
699 | 304 | std::size_t level = 0; | |
700 | 234 | do | |
701 | { | ||
702 | 538 | pop_last_segment( | |
703 | str, seg, level, remove_unmatched); | ||
704 |
2/2✓ Branch 1 taken 1154 times.
✓ Branch 2 taken 538 times.
|
1692 | while (!seg.empty()) |
705 | { | ||
706 | 1154 | char c = path_pop_back(seg); | |
707 | 1154 | hasher.put(c); | |
708 | } | ||
709 | } | ||
710 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 304 times.
|
538 | while (!str.empty()); |
711 | 304 | } | |
712 | |||
713 | // compare segments as if there were a normalized | ||
714 | int | ||
715 | 173 | segments_compare( | |
716 | segments_encoded_view seg0, | ||
717 | segments_encoded_view seg1) noexcept | ||
718 | { | ||
719 | // calculate path size as if it were normalized | ||
720 | auto normalized_size = | ||
721 | 346 | [](segments_encoded_view seg) -> std::size_t | |
722 | { | ||
723 |
2/2✓ Branch 1 taken 108 times.
✓ Branch 2 taken 238 times.
|
346 | if (seg.empty()) |
724 | 108 | return seg.is_absolute(); | |
725 | |||
726 | 238 | std::size_t n = 0; | |
727 | 238 | std::size_t skip = 0; | |
728 | 238 | auto begin = seg.begin(); | |
729 | 238 | auto it = seg.end(); | |
730 |
2/2✓ Branch 1 taken 662 times.
✓ Branch 2 taken 238 times.
|
900 | while (it != begin) |
731 | { | ||
732 | 662 | --it; | |
733 | 662 | decode_view dseg = **it; | |
734 |
2/2✓ Branch 1 taken 167 times.
✓ Branch 2 taken 495 times.
|
662 | if (dseg == "..") |
735 | 167 | ++skip; | |
736 |
2/2✓ Branch 1 taken 457 times.
✓ Branch 2 taken 38 times.
|
495 | else if (dseg != ".") |
737 | { | ||
738 |
2/2✓ Branch 0 taken 85 times.
✓ Branch 1 taken 372 times.
|
457 | if (skip) |
739 | 85 | --skip; | |
740 | else | ||
741 | 372 | n += dseg.size() + 1; | |
742 | } | ||
743 | } | ||
744 | 238 | n += skip * 3; | |
745 | 238 | n -= !seg.is_absolute(); | |
746 | 238 | return n; | |
747 | }; | ||
748 | |||
749 | // find the normalized size for the comparison | ||
750 | 173 | std::size_t n0 = normalized_size(seg0); | |
751 | 173 | std::size_t n1 = normalized_size(seg1); | |
752 | 173 | std::size_t n00 = n0; | |
753 | 173 | std::size_t n10 = n1; | |
754 | |||
755 | // consume the last char from a segment range | ||
756 | auto consume_last = | ||
757 | 1640 | []( | |
758 | std::size_t& n, | ||
759 | decode_view& dseg, | ||
760 | segments_encoded_view::iterator& begin, | ||
761 | segments_encoded_view::iterator& it, | ||
762 | decode_view::iterator& cit, | ||
763 | std::size_t& skip, | ||
764 | bool& at_slash) -> char | ||
765 | { | ||
766 |
2/2✓ Branch 2 taken 1009 times.
✓ Branch 3 taken 631 times.
|
1640 | if (cit != dseg.begin()) |
767 | { | ||
768 | // return last char from current segment | ||
769 | 1009 | at_slash = false; | |
770 | 1009 | --cit; | |
771 | 1009 | --n; | |
772 | 1009 | return *cit; | |
773 | } | ||
774 | |||
775 |
2/2✓ Branch 0 taken 371 times.
✓ Branch 1 taken 260 times.
|
631 | if (!at_slash) |
776 | { | ||
777 | // current segment dseg is over and | ||
778 | // previous char was not a slash | ||
779 | // so we output one | ||
780 | 371 | at_slash = true; | |
781 | 371 | --n; | |
782 | 371 | return '/'; | |
783 | } | ||
784 | |||
785 | // current segment dseg is over and | ||
786 | // last char was already the slash | ||
787 | // between segments, so take the | ||
788 | // next final segment to consume | ||
789 | 260 | at_slash = false; | |
790 |
1/2✓ Branch 2 taken 498 times.
✗ Branch 3 not taken.
|
498 | while (cit == dseg.begin()) |
791 | { | ||
792 | // take next segment | ||
793 |
2/2✓ Branch 1 taken 376 times.
✓ Branch 2 taken 122 times.
|
498 | if (it != begin) |
794 | 376 | --it; | |
795 | else | ||
796 | 122 | break; | |
797 |
2/2✓ Branch 3 taken 140 times.
✓ Branch 4 taken 236 times.
|
376 | if (**it == "..") |
798 | { | ||
799 | // skip next if this is ".." | ||
800 | 140 | ++skip; | |
801 | } | ||
802 |
2/2✓ Branch 3 taken 208 times.
✓ Branch 4 taken 28 times.
|
236 | else if (**it != ".") |
803 | { | ||
804 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 138 times.
|
208 | if (skip) |
805 | { | ||
806 | // discount skips | ||
807 | 70 | --skip; | |
808 | } | ||
809 | else | ||
810 | { | ||
811 | // or update current seg | ||
812 | 138 | dseg = **it; | |
813 | 138 | cit = dseg.end(); | |
814 | 138 | break; | |
815 | } | ||
816 | } | ||
817 | } | ||
818 | // consume from the new current | ||
819 | // segment | ||
820 | 260 | --n; | |
821 |
2/2✓ Branch 2 taken 123 times.
✓ Branch 3 taken 137 times.
|
260 | if (cit != dseg.begin()) |
822 | { | ||
823 | // in the general case, we consume | ||
824 | // one more character from the end | ||
825 | 123 | --cit; | |
826 | 123 | return *cit; | |
827 | } | ||
828 | |||
829 | // nothing left to consume in the | ||
830 | // current and new segment | ||
831 |
2/2✓ Branch 1 taken 128 times.
✓ Branch 2 taken 9 times.
|
137 | if (it == begin) |
832 | { | ||
833 | // if this is the first | ||
834 | // segment, the segments are | ||
835 | // over and there can only | ||
836 | // be repetitions of "../" to | ||
837 | // output | ||
838 | 128 | return "/.."[n % 3]; | |
839 | } | ||
840 | // at other segments, we need | ||
841 | // a slash to transition to the | ||
842 | // next segment | ||
843 | 9 | at_slash = true; | |
844 | 9 | return '/'; | |
845 | }; | ||
846 | |||
847 | // consume final segments from seg0 that | ||
848 | // should not influence the comparison | ||
849 | 173 | auto begin0 = seg0.begin(); | |
850 | 173 | auto it0 = seg0.end(); | |
851 | 173 | decode_view dseg0; | |
852 |
2/2✓ Branch 2 taken 119 times.
✓ Branch 3 taken 54 times.
|
173 | if (it0 != seg0.begin()) |
853 | { | ||
854 | 119 | --it0; | |
855 | 119 | dseg0 = **it0; | |
856 | } | ||
857 | 173 | decode_view::iterator cit0 = dseg0.end(); | |
858 | 173 | std::size_t skip0 = 0; | |
859 | 173 | bool at_slash0 = true; | |
860 |
2/2✓ Branch 0 taken 134 times.
✓ Branch 1 taken 173 times.
|
307 | while (n0 > n1) |
861 | { | ||
862 | 134 | consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0); | |
863 | } | ||
864 | |||
865 | // consume final segments from seg1 that | ||
866 | // should not influence the comparison | ||
867 | 173 | auto begin1 = seg1.begin(); | |
868 | 173 | auto it1 = seg1.end(); | |
869 | 173 | decode_view dseg1; | |
870 |
2/2✓ Branch 2 taken 119 times.
✓ Branch 3 taken 54 times.
|
173 | if (it1 != seg1.begin()) |
871 | { | ||
872 | 119 | --it1; | |
873 | 119 | dseg1 = **it1; | |
874 | } | ||
875 | 173 | decode_view::iterator cit1 = dseg1.end(); | |
876 | 173 | std::size_t skip1 = 0; | |
877 | 173 | bool at_slash1 = true; | |
878 |
2/2✓ Branch 0 taken 34 times.
✓ Branch 1 taken 173 times.
|
207 | while (n1 > n0) |
879 | { | ||
880 | 34 | consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1); | |
881 | } | ||
882 | |||
883 | 173 | int cmp = 0; | |
884 |
2/2✓ Branch 0 taken 736 times.
✓ Branch 1 taken 173 times.
|
909 | while (n0) |
885 | { | ||
886 | 736 | char c0 = consume_last( | |
887 | n0, dseg0, begin0, it0, cit0, skip0, at_slash0); | ||
888 | 736 | char c1 = consume_last( | |
889 | n1, dseg1, begin1, it1, cit1, skip1, at_slash1); | ||
890 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 700 times.
|
736 | if (c0 < c1) |
891 | 36 | cmp = -1; | |
892 |
2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 659 times.
|
700 | else if (c1 < c0) |
893 | 41 | cmp = +1; | |
894 | } | ||
895 | |||
896 |
2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 132 times.
|
173 | if (cmp != 0) |
897 | 41 | return cmp; | |
898 |
2/2✓ Branch 0 taken 130 times.
✓ Branch 1 taken 2 times.
|
132 | if ( n00 == n10 ) |
899 | 130 | return 0; | |
900 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if ( n00 < n10 ) |
901 | 1 | return -1; | |
902 | 1 | return 1; | |
903 | } | ||
904 | |||
905 | } // detail | ||
906 | } // urls | ||
907 | } // boost | ||
908 | |||
909 | |||
910 |